From cbb70ce070d220642b038ea101d9c0f9fbf860d6 Mon Sep 17 00:00:00 2001 From: dim <dim@FreeBSD.org> Date: Sun, 20 Feb 2011 12:57:14 +0000 Subject: Vendor import of llvm trunk r126079: http://llvm.org/svn/llvm-project/llvm/trunk@126079 --- lib/Target/ARM/ARM.h | 114 +- lib/Target/ARM/ARM.td | 58 +- lib/Target/ARM/ARMAddressingModes.h | 12 + lib/Target/ARM/ARMAsmBackend.cpp | 512 + lib/Target/ARM/ARMAsmPrinter.cpp | 2225 ++-- lib/Target/ARM/ARMAsmPrinter.h | 112 + lib/Target/ARM/ARMBaseInfo.h | 249 + lib/Target/ARM/ARMBaseInstrInfo.cpp | 1305 +- lib/Target/ARM/ARMBaseInstrInfo.h | 171 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 948 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 65 +- lib/Target/ARM/ARMBuildAttrs.h | 73 +- lib/Target/ARM/ARMCallingConv.h | 160 + lib/Target/ARM/ARMCallingConv.td | 29 + lib/Target/ARM/ARMCodeEmitter.cpp | 368 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 27 +- lib/Target/ARM/ARMConstantPoolValue.cpp | 26 +- lib/Target/ARM/ARMConstantPoolValue.h | 43 +- lib/Target/ARM/ARMELFWriterInfo.cpp | 83 + lib/Target/ARM/ARMELFWriterInfo.h | 58 + lib/Target/ARM/ARMExpandPseudoInsts.cpp | 1227 +- lib/Target/ARM/ARMFastISel.cpp | 1670 ++- lib/Target/ARM/ARMFixupKinds.h | 97 + lib/Target/ARM/ARMFrameInfo.h | 32 - lib/Target/ARM/ARMFrameLowering.cpp | 1021 ++ lib/Target/ARM/ARMFrameLowering.h | 74 + lib/Target/ARM/ARMGlobalMerge.cpp | 69 +- lib/Target/ARM/ARMHazardRecognizer.cpp | 121 + lib/Target/ARM/ARMHazardRecognizer.h | 54 + lib/Target/ARM/ARMISelDAGToDAG.cpp | 1823 ++- lib/Target/ARM/ARMISelLowering.cpp | 2278 +++- lib/Target/ARM/ARMISelLowering.h | 88 +- lib/Target/ARM/ARMInstrFormats.td | 1191 +- lib/Target/ARM/ARMInstrInfo.cpp | 33 +- lib/Target/ARM/ARMInstrInfo.h | 5 - lib/Target/ARM/ARMInstrInfo.td | 3554 +++-- lib/Target/ARM/ARMInstrNEON.td | 3650 ++++-- lib/Target/ARM/ARMInstrThumb.td | 1661 ++- lib/Target/ARM/ARMInstrThumb2.td | 2725 ++-- lib/Target/ARM/ARMInstrVFP.td | 1146 +- lib/Target/ARM/ARMJITInfo.cpp | 13 +- lib/Target/ARM/ARMJITInfo.h | 2 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 519 +- lib/Target/ARM/ARMMCCodeEmitter.cpp | 1230 ++ lib/Target/ARM/ARMMCExpr.cpp | 73 + lib/Target/ARM/ARMMCExpr.h | 73 + lib/Target/ARM/ARMMCInstLower.cpp | 147 +- lib/Target/ARM/ARMMCInstLower.h | 56 - lib/Target/ARM/ARMMachineFunctionInfo.h | 60 +- lib/Target/ARM/ARMPerfectShuffle.h | 13122 +++++++++---------- lib/Target/ARM/ARMRegisterInfo.cpp | 1 - lib/Target/ARM/ARMRegisterInfo.td | 90 +- lib/Target/ARM/ARMSchedule.td | 140 +- lib/Target/ARM/ARMScheduleA8.td | 862 +- lib/Target/ARM/ARMScheduleA9.td | 1799 ++- lib/Target/ARM/ARMScheduleV6.td | 130 +- lib/Target/ARM/ARMSelectionDAGInfo.cpp | 16 +- lib/Target/ARM/ARMSelectionDAGInfo.h | 6 +- lib/Target/ARM/ARMSubtarget.cpp | 119 +- lib/Target/ARM/ARMSubtarget.h | 48 +- lib/Target/ARM/ARMTargetMachine.cpp | 62 +- lib/Target/ARM/ARMTargetMachine.h | 36 +- lib/Target/ARM/ARMTargetObjectFile.cpp | 19 +- lib/Target/ARM/ARMTargetObjectFile.h | 11 +- lib/Target/ARM/AsmParser/ARMAsmLexer.cpp | 192 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 1530 ++- lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp | 800 -- lib/Target/ARM/AsmPrinter/ARMInstPrinter.h | 118 - lib/Target/ARM/AsmPrinter/CMakeLists.txt | 6 - lib/Target/ARM/AsmPrinter/Makefile | 15 - lib/Target/ARM/CMakeLists.txt | 25 +- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 49 +- .../ARM/Disassembler/ARMDisassemblerCore.cpp | 259 +- lib/Target/ARM/Disassembler/CMakeLists.txt | 14 + .../ARM/Disassembler/ThumbDisassemblerCore.h | 298 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 711 + lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 111 + lib/Target/ARM/InstPrinter/CMakeLists.txt | 6 + lib/Target/ARM/InstPrinter/Makefile | 15 + lib/Target/ARM/MLxExpansionPass.cpp | 321 + lib/Target/ARM/Makefile | 4 +- lib/Target/ARM/NEONPreAllocPass.cpp | 406 - lib/Target/ARM/README-Thumb.txt | 21 +- lib/Target/ARM/Thumb1FrameLowering.cpp | 352 + lib/Target/ARM/Thumb1FrameLowering.h | 52 + lib/Target/ARM/Thumb1InstrInfo.cpp | 84 +- lib/Target/ARM/Thumb1InstrInfo.h | 17 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 332 +- lib/Target/ARM/Thumb1RegisterInfo.h | 5 - lib/Target/ARM/Thumb2HazardRecognizer.cpp | 53 - lib/Target/ARM/Thumb2HazardRecognizer.h | 40 - lib/Target/ARM/Thumb2InstrInfo.cpp | 44 +- lib/Target/ARM/Thumb2InstrInfo.h | 8 - lib/Target/ARM/Thumb2RegisterInfo.cpp | 1 - lib/Target/ARM/Thumb2SizeReduction.cpp | 133 +- lib/Target/Alpha/Alpha.h | 7 + lib/Target/Alpha/AlphaAsmPrinter.cpp | 166 + lib/Target/Alpha/AlphaCodeEmitter.cpp | 222 - lib/Target/Alpha/AlphaFrameLowering.cpp | 143 + lib/Target/Alpha/AlphaFrameLowering.h | 43 + lib/Target/Alpha/AlphaISelDAGToDAG.cpp | 19 +- lib/Target/Alpha/AlphaISelLowering.cpp | 87 +- lib/Target/Alpha/AlphaISelLowering.h | 5 + lib/Target/Alpha/AlphaInstrInfo.td | 6 +- lib/Target/Alpha/AlphaJITInfo.cpp | 310 - lib/Target/Alpha/AlphaJITInfo.h | 53 - lib/Target/Alpha/AlphaRegisterInfo.cpp | 152 +- lib/Target/Alpha/AlphaRegisterInfo.h | 10 - lib/Target/Alpha/AlphaSchedule.td | 4 +- lib/Target/Alpha/AlphaTargetMachine.cpp | 10 +- lib/Target/Alpha/AlphaTargetMachine.h | 20 +- lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp | 166 - lib/Target/Alpha/AsmPrinter/CMakeLists.txt | 6 - lib/Target/Alpha/AsmPrinter/Makefile | 15 - lib/Target/Alpha/CMakeLists.txt | 7 +- lib/Target/Alpha/Makefile | 4 +- .../Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp | 156 - lib/Target/Blackfin/AsmPrinter/CMakeLists.txt | 6 - lib/Target/Blackfin/AsmPrinter/Makefile | 16 - lib/Target/Blackfin/BlackfinAsmPrinter.cpp | 156 + lib/Target/Blackfin/BlackfinFrameLowering.cpp | 124 + lib/Target/Blackfin/BlackfinFrameLowering.h | 46 + lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp | 6 +- lib/Target/Blackfin/BlackfinISelLowering.cpp | 61 +- lib/Target/Blackfin/BlackfinISelLowering.h | 6 + lib/Target/Blackfin/BlackfinInstrInfo.td | 8 +- lib/Target/Blackfin/BlackfinRegisterInfo.cpp | 106 +- lib/Target/Blackfin/BlackfinRegisterInfo.h | 8 - lib/Target/Blackfin/BlackfinRegisterInfo.td | 20 +- lib/Target/Blackfin/BlackfinTargetMachine.cpp | 2 +- lib/Target/Blackfin/BlackfinTargetMachine.h | 17 +- lib/Target/Blackfin/CMakeLists.txt | 4 + lib/Target/Blackfin/Makefile | 2 +- lib/Target/CBackend/CBackend.cpp | 337 +- lib/Target/CBackend/CMakeLists.txt | 2 + lib/Target/CMakeLists.txt | 44 +- lib/Target/CellSPU/AsmPrinter/CMakeLists.txt | 9 - lib/Target/CellSPU/AsmPrinter/Makefile | 17 - lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp | 364 - lib/Target/CellSPU/CMakeLists.txt | 6 +- lib/Target/CellSPU/Makefile | 2 +- lib/Target/CellSPU/README.txt | 2 +- lib/Target/CellSPU/SPU.h | 1 + lib/Target/CellSPU/SPU64InstrInfo.td | 79 +- lib/Target/CellSPU/SPUAsmPrinter.cpp | 327 + lib/Target/CellSPU/SPUFrameInfo.cpp | 29 - lib/Target/CellSPU/SPUFrameInfo.h | 75 - lib/Target/CellSPU/SPUFrameLowering.cpp | 276 + lib/Target/CellSPU/SPUFrameLowering.h | 94 + lib/Target/CellSPU/SPUHazardRecognizers.cpp | 4 +- lib/Target/CellSPU/SPUHazardRecognizers.h | 4 +- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 223 +- lib/Target/CellSPU/SPUISelLowering.cpp | 787 +- lib/Target/CellSPU/SPUISelLowering.h | 23 +- lib/Target/CellSPU/SPUInstrInfo.cpp | 15 +- lib/Target/CellSPU/SPUInstrInfo.h | 4 + lib/Target/CellSPU/SPUInstrInfo.td | 396 +- lib/Target/CellSPU/SPUMCAsmInfo.cpp | 3 +- lib/Target/CellSPU/SPUNodes.td | 18 +- lib/Target/CellSPU/SPUNopFiller.cpp | 153 + lib/Target/CellSPU/SPUOperands.td | 18 +- lib/Target/CellSPU/SPURegisterInfo.cpp | 264 +- lib/Target/CellSPU/SPURegisterInfo.h | 16 +- lib/Target/CellSPU/SPUSchedule.td | 8 +- lib/Target/CellSPU/SPUSubtarget.cpp | 21 + lib/Target/CellSPU/SPUSubtarget.h | 6 +- lib/Target/CellSPU/SPUTargetMachine.cpp | 13 +- lib/Target/CellSPU/SPUTargetMachine.h | 15 +- lib/Target/CppBackend/CMakeLists.txt | 2 + lib/Target/CppBackend/CPPBackend.cpp | 37 +- lib/Target/MBlaze/AsmParser/CMakeLists.txt | 8 + lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp | 127 + lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp | 568 + lib/Target/MBlaze/AsmParser/Makefile | 15 + lib/Target/MBlaze/AsmPrinter/CMakeLists.txt | 9 - lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp | 295 - lib/Target/MBlaze/AsmPrinter/Makefile | 17 - lib/Target/MBlaze/CMakeLists.txt | 14 +- lib/Target/MBlaze/Disassembler/CMakeLists.txt | 16 + .../MBlaze/Disassembler/MBlazeDisassembler.cpp | 647 + .../MBlaze/Disassembler/MBlazeDisassembler.h | 55 + lib/Target/MBlaze/Disassembler/Makefile | 16 + lib/Target/MBlaze/InstPrinter/CMakeLists.txt | 8 + .../MBlaze/InstPrinter/MBlazeInstPrinter.cpp | 69 + lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h | 43 + lib/Target/MBlaze/InstPrinter/Makefile | 16 + lib/Target/MBlaze/MBlaze.h | 8 + lib/Target/MBlaze/MBlaze.td | 41 +- lib/Target/MBlaze/MBlazeAsmBackend.cpp | 163 + lib/Target/MBlaze/MBlazeAsmPrinter.cpp | 335 + lib/Target/MBlaze/MBlazeCallingConv.td | 14 +- lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp | 191 +- lib/Target/MBlaze/MBlazeELFWriterInfo.cpp | 111 + lib/Target/MBlaze/MBlazeELFWriterInfo.h | 58 + lib/Target/MBlaze/MBlazeFrameLowering.cpp | 450 + lib/Target/MBlaze/MBlazeFrameLowering.h | 53 + lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp | 87 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 720 +- lib/Target/MBlaze/MBlazeISelLowering.h | 46 +- lib/Target/MBlaze/MBlazeInstrFPU.td | 253 +- lib/Target/MBlaze/MBlazeInstrFSL.td | 326 +- lib/Target/MBlaze/MBlazeInstrFormats.td | 272 +- lib/Target/MBlaze/MBlazeInstrInfo.cpp | 179 +- lib/Target/MBlaze/MBlazeInstrInfo.h | 166 +- lib/Target/MBlaze/MBlazeInstrInfo.td | 927 +- lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp | 6 +- lib/Target/MBlaze/MBlazeIntrinsics.td | 6 +- lib/Target/MBlaze/MBlazeMCAsmInfo.cpp | 9 +- lib/Target/MBlaze/MBlazeMCAsmInfo.h | 4 +- lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp | 223 + lib/Target/MBlaze/MBlazeMCInstLower.cpp | 166 + lib/Target/MBlaze/MBlazeMCInstLower.h | 50 + lib/Target/MBlaze/MBlazeMachineFunction.h | 86 +- lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 343 +- lib/Target/MBlaze/MBlazeRegisterInfo.h | 20 +- lib/Target/MBlaze/MBlazeRegisterInfo.td | 140 +- lib/Target/MBlaze/MBlazeRelocations.h | 47 + lib/Target/MBlaze/MBlazeSchedule.td | 4 +- lib/Target/MBlaze/MBlazeTargetMachine.cpp | 66 +- lib/Target/MBlaze/MBlazeTargetMachine.h | 33 +- lib/Target/MBlaze/MBlazeTargetObjectFile.cpp | 9 +- lib/Target/MBlaze/MBlazeTargetObjectFile.h | 7 +- lib/Target/MBlaze/Makefile | 12 +- lib/Target/MBlaze/TODO | 26 + lib/Target/MBlaze/TargetInfo/CMakeLists.txt | 3 +- lib/Target/MSP430/AsmPrinter/CMakeLists.txt | 8 - lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp | 179 - lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp | 116 - lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h | 43 - lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp | 150 - lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h | 50 - lib/Target/MSP430/AsmPrinter/Makefile | 15 - lib/Target/MSP430/CMakeLists.txt | 6 +- lib/Target/MSP430/InstPrinter/CMakeLists.txt | 6 + .../MSP430/InstPrinter/MSP430InstPrinter.cpp | 113 + lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h | 43 + lib/Target/MSP430/InstPrinter/Makefile | 15 + lib/Target/MSP430/MSP430.td | 1 + lib/Target/MSP430/MSP430AsmPrinter.cpp | 179 + lib/Target/MSP430/MSP430FrameLowering.cpp | 223 + lib/Target/MSP430/MSP430FrameLowering.h | 53 + lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 17 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 22 +- lib/Target/MSP430/MSP430InstrInfo.cpp | 52 +- lib/Target/MSP430/MSP430InstrInfo.h | 9 - lib/Target/MSP430/MSP430InstrInfo.td | 16 +- lib/Target/MSP430/MSP430MCInstLower.cpp | 150 + lib/Target/MSP430/MSP430MCInstLower.h | 50 + lib/Target/MSP430/MSP430RegisterInfo.cpp | 170 +- lib/Target/MSP430/MSP430RegisterInfo.h | 6 - lib/Target/MSP430/MSP430RegisterInfo.td | 8 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 14 +- lib/Target/MSP430/MSP430TargetMachine.h | 12 +- lib/Target/MSP430/Makefile | 2 +- lib/Target/MSP430/TargetInfo/CMakeLists.txt | 2 +- lib/Target/Mangler.cpp | 10 - lib/Target/Mips/AsmPrinter/CMakeLists.txt | 9 - lib/Target/Mips/AsmPrinter/Makefile | 17 - lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp | 386 - lib/Target/Mips/CMakeLists.txt | 4 +- lib/Target/Mips/Makefile | 2 +- lib/Target/Mips/Mips.td | 30 +- lib/Target/Mips/MipsAsmPrinter.cpp | 393 + lib/Target/Mips/MipsDelaySlotFiller.cpp | 13 +- lib/Target/Mips/MipsFrameLowering.cpp | 314 + lib/Target/Mips/MipsFrameLowering.h | 48 + lib/Target/Mips/MipsISelDAGToDAG.cpp | 28 +- lib/Target/Mips/MipsISelLowering.cpp | 620 +- lib/Target/Mips/MipsISelLowering.h | 18 +- lib/Target/Mips/MipsInstrFPU.td | 2 +- lib/Target/Mips/MipsInstrInfo.td | 355 +- lib/Target/Mips/MipsMachineFunction.h | 34 +- lib/Target/Mips/MipsRegisterInfo.cpp | 287 +- lib/Target/Mips/MipsRegisterInfo.h | 5 - lib/Target/Mips/MipsSchedule.td | 2 +- lib/Target/Mips/MipsSubtarget.h | 4 +- lib/Target/Mips/MipsTargetMachine.cpp | 20 +- lib/Target/Mips/MipsTargetMachine.h | 21 +- lib/Target/Mips/MipsTargetObjectFile.cpp | 29 +- lib/Target/PIC16/AsmPrinter/CMakeLists.txt | 9 - lib/Target/PIC16/AsmPrinter/Makefile | 15 - lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp | 512 - lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h | 88 - lib/Target/PIC16/CMakeLists.txt | 26 - lib/Target/PIC16/Makefile | 24 - lib/Target/PIC16/PIC16.h | 134 - lib/Target/PIC16/PIC16.td | 40 - lib/Target/PIC16/PIC16ABINames.h | 399 - lib/Target/PIC16/PIC16DebugInfo.cpp | 490 - lib/Target/PIC16/PIC16DebugInfo.h | 156 - lib/Target/PIC16/PIC16ISelDAGToDAG.cpp | 50 - lib/Target/PIC16/PIC16ISelDAGToDAG.h | 60 - lib/Target/PIC16/PIC16ISelLowering.cpp | 2000 --- lib/Target/PIC16/PIC16ISelLowering.h | 253 - lib/Target/PIC16/PIC16InstrFormats.td | 117 - lib/Target/PIC16/PIC16InstrInfo.cpp | 224 - lib/Target/PIC16/PIC16InstrInfo.h | 76 - lib/Target/PIC16/PIC16InstrInfo.td | 540 - lib/Target/PIC16/PIC16MCAsmInfo.cpp | 59 - lib/Target/PIC16/PIC16MCAsmInfo.h | 35 - lib/Target/PIC16/PIC16MachineFunctionInfo.h | 52 - lib/Target/PIC16/PIC16MemSelOpt.cpp | 254 - lib/Target/PIC16/PIC16Passes/Makefile | 15 - lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp | 299 - lib/Target/PIC16/PIC16Passes/PIC16Cloner.h | 83 - lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp | 182 - lib/Target/PIC16/PIC16Passes/PIC16Overlay.h | 60 - lib/Target/PIC16/PIC16RegisterInfo.cpp | 84 - lib/Target/PIC16/PIC16RegisterInfo.h | 64 - lib/Target/PIC16/PIC16RegisterInfo.td | 33 - lib/Target/PIC16/PIC16Section.cpp | 104 - lib/Target/PIC16/PIC16Section.h | 99 - lib/Target/PIC16/PIC16SelectionDAGInfo.cpp | 23 - lib/Target/PIC16/PIC16SelectionDAGInfo.h | 31 - lib/Target/PIC16/PIC16Subtarget.cpp | 27 - lib/Target/PIC16/PIC16Subtarget.h | 44 - lib/Target/PIC16/PIC16TargetMachine.cpp | 55 - lib/Target/PIC16/PIC16TargetMachine.h | 70 - lib/Target/PIC16/PIC16TargetObjectFile.cpp | 384 - lib/Target/PIC16/PIC16TargetObjectFile.h | 168 - lib/Target/PIC16/TargetInfo/CMakeLists.txt | 7 - lib/Target/PIC16/TargetInfo/Makefile | 15 - lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp | 22 - lib/Target/PTX/CMakeLists.txt | 26 + lib/Target/PTX/Makefile | 26 + lib/Target/PTX/PTX.h | 49 + lib/Target/PTX/PTX.td | 54 + lib/Target/PTX/PTXAsmPrinter.cpp | 347 + lib/Target/PTX/PTXFrameLowering.cpp | 24 + lib/Target/PTX/PTXFrameLowering.h | 43 + lib/Target/PTX/PTXISelDAGToDAG.cpp | 151 + lib/Target/PTX/PTXISelLowering.cpp | 210 + lib/Target/PTX/PTXISelLowering.h | 67 + lib/Target/PTX/PTXInstrFormats.td | 24 + lib/Target/PTX/PTXInstrInfo.cpp | 87 + lib/Target/PTX/PTXInstrInfo.h | 75 + lib/Target/PTX/PTXInstrInfo.td | 257 + lib/Target/PTX/PTXMCAsmInfo.cpp | 30 + lib/Target/PTX/PTXMCAsmInfo.h | 28 + lib/Target/PTX/PTXMCAsmStreamer.cpp | 542 + lib/Target/PTX/PTXMFInfoExtract.cpp | 96 + lib/Target/PTX/PTXMachineFunctionInfo.h | 79 + lib/Target/PTX/PTXRegisterInfo.cpp | 19 + lib/Target/PTX/PTXRegisterInfo.h | 63 + lib/Target/PTX/PTXRegisterInfo.td | 102 + lib/Target/PTX/PTXSubtarget.cpp | 23 + lib/Target/PTX/PTXSubtarget.h | 32 + lib/Target/PTX/PTXTargetMachine.cpp | 60 + lib/Target/PTX/PTXTargetMachine.h | 60 + lib/Target/PTX/TargetInfo/CMakeLists.txt | 7 + lib/Target/PTX/TargetInfo/Makefile | 15 + lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp | 21 + lib/Target/PowerPC/AsmPrinter/CMakeLists.txt | 6 - lib/Target/PowerPC/AsmPrinter/Makefile | 15 - lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp | 922 -- lib/Target/PowerPC/CMakeLists.txt | 9 +- lib/Target/PowerPC/InstPrinter/CMakeLists.txt | 6 + lib/Target/PowerPC/InstPrinter/Makefile | 16 + lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 292 + lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h | 69 + lib/Target/PowerPC/Makefile | 5 +- lib/Target/PowerPC/PPC.h | 62 +- lib/Target/PowerPC/PPC.td | 6 + lib/Target/PowerPC/PPCAsmBackend.cpp | 119 + lib/Target/PowerPC/PPCAsmPrinter.cpp | 696 + lib/Target/PowerPC/PPCCodeEmitter.cpp | 253 +- lib/Target/PowerPC/PPCFixupKinds.h | 45 + lib/Target/PowerPC/PPCFrameInfo.h | 300 - lib/Target/PowerPC/PPCFrameLowering.cpp | 971 ++ lib/Target/PowerPC/PPCFrameLowering.h | 322 + lib/Target/PowerPC/PPCHazardRecognizers.cpp | 56 +- lib/Target/PowerPC/PPCHazardRecognizers.h | 20 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 210 +- lib/Target/PowerPC/PPCISelLowering.cpp | 731 +- lib/Target/PowerPC/PPCISelLowering.h | 7 +- lib/Target/PowerPC/PPCInstr64Bit.td | 57 +- lib/Target/PowerPC/PPCInstrFormats.td | 39 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 81 +- lib/Target/PowerPC/PPCInstrInfo.h | 26 +- lib/Target/PowerPC/PPCInstrInfo.td | 177 +- lib/Target/PowerPC/PPCJITInfo.cpp | 2 +- lib/Target/PowerPC/PPCMCAsmInfo.cpp | 5 +- lib/Target/PowerPC/PPCMCCodeEmitter.cpp | 195 + lib/Target/PowerPC/PPCMCInstLower.cpp | 172 + lib/Target/PowerPC/PPCRegisterInfo.cpp | 975 +- lib/Target/PowerPC/PPCRegisterInfo.h | 19 - lib/Target/PowerPC/PPCRegisterInfo.td | 13 +- lib/Target/PowerPC/PPCScheduleG3.td | 2 +- lib/Target/PowerPC/PPCScheduleG4.td | 2 +- lib/Target/PowerPC/PPCScheduleG4Plus.td | 2 +- lib/Target/PowerPC/PPCScheduleG5.td | 2 +- lib/Target/PowerPC/PPCSubtarget.cpp | 2 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 31 +- lib/Target/PowerPC/PPCTargetMachine.h | 18 +- lib/Target/PowerPC/README.txt | 29 +- lib/Target/README.txt | 979 +- lib/Target/Sparc/AsmPrinter/CMakeLists.txt | 6 - lib/Target/Sparc/AsmPrinter/Makefile | 15 - lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp | 249 - lib/Target/Sparc/CMakeLists.txt | 4 +- lib/Target/Sparc/DelaySlotFiller.cpp | 230 +- lib/Target/Sparc/Makefile | 2 +- lib/Target/Sparc/SparcAsmPrinter.cpp | 251 + lib/Target/Sparc/SparcCallingConv.td | 10 +- lib/Target/Sparc/SparcFrameLowering.cpp | 80 + lib/Target/Sparc/SparcFrameLowering.h | 41 + lib/Target/Sparc/SparcISelDAGToDAG.cpp | 18 +- lib/Target/Sparc/SparcISelLowering.cpp | 721 +- lib/Target/Sparc/SparcISelLowering.h | 3 +- lib/Target/Sparc/SparcInstrInfo.cpp | 195 +- lib/Target/Sparc/SparcInstrInfo.h | 11 +- lib/Target/Sparc/SparcInstrInfo.td | 221 +- lib/Target/Sparc/SparcMachineFunctionInfo.h | 11 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 53 - lib/Target/Sparc/SparcRegisterInfo.h | 9 +- lib/Target/Sparc/SparcRegisterInfo.td | 3 + lib/Target/Sparc/SparcTargetMachine.cpp | 6 +- lib/Target/Sparc/SparcTargetMachine.h | 15 +- lib/Target/SubtargetFeature.cpp | 3 +- lib/Target/SystemZ/AsmPrinter/CMakeLists.txt | 6 - lib/Target/SystemZ/AsmPrinter/Makefile | 15 - .../SystemZ/AsmPrinter/SystemZAsmPrinter.cpp | 217 - lib/Target/SystemZ/CMakeLists.txt | 4 +- lib/Target/SystemZ/Makefile | 2 +- lib/Target/SystemZ/SystemZAsmPrinter.cpp | 223 + lib/Target/SystemZ/SystemZFrameLowering.cpp | 386 + lib/Target/SystemZ/SystemZFrameLowering.h | 57 + lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 31 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 18 +- lib/Target/SystemZ/SystemZInstrBuilder.h | 6 +- lib/Target/SystemZ/SystemZInstrInfo.cpp | 150 - lib/Target/SystemZ/SystemZInstrInfo.h | 10 - lib/Target/SystemZ/SystemZInstrInfo.td | 56 +- lib/Target/SystemZ/SystemZMCAsmInfo.cpp | 5 +- lib/Target/SystemZ/SystemZOperands.td | 15 + lib/Target/SystemZ/SystemZRegisterInfo.cpp | 214 +- lib/Target/SystemZ/SystemZRegisterInfo.h | 12 - lib/Target/SystemZ/SystemZRegisterInfo.td | 48 +- lib/Target/SystemZ/SystemZTargetMachine.cpp | 2 +- lib/Target/SystemZ/SystemZTargetMachine.h | 12 +- lib/Target/Target.cpp | 15 +- lib/Target/TargetAsmInfo.cpp | 27 + lib/Target/TargetData.cpp | 58 +- lib/Target/TargetELFWriterInfo.cpp | 5 +- lib/Target/TargetFrameInfo.cpp | 19 - lib/Target/TargetFrameLowering.cpp | 53 + lib/Target/TargetInstrInfo.cpp | 93 +- lib/Target/TargetLibraryInfo.cpp | 55 + lib/Target/TargetLoweringObjectFile.cpp | 8 +- lib/Target/TargetMachine.cpp | 4 +- lib/Target/TargetRegisterInfo.cpp | 43 +- lib/Target/X86/AsmParser/X86AsmLexer.cpp | 9 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 437 +- lib/Target/X86/AsmPrinter/CMakeLists.txt | 8 - lib/Target/X86/AsmPrinter/Makefile | 15 - lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp | 129 - lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h | 81 - lib/Target/X86/AsmPrinter/X86InstComments.cpp | 232 - lib/Target/X86/AsmPrinter/X86InstComments.h | 25 - lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp | 140 - lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h | 95 - lib/Target/X86/CMakeLists.txt | 14 +- lib/Target/X86/Disassembler/CMakeLists.txt | 2 +- lib/Target/X86/Disassembler/X86Disassembler.cpp | 15 +- lib/Target/X86/Disassembler/X86Disassembler.h | 2 +- .../X86/Disassembler/X86DisassemblerDecoder.c | 31 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 4 +- .../Disassembler/X86DisassemblerDecoderCommon.h | 3 +- lib/Target/X86/InstPrinter/CMakeLists.txt | 8 + lib/Target/X86/InstPrinter/Makefile | 15 + lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 127 + lib/Target/X86/InstPrinter/X86ATTInstPrinter.h | 81 + lib/Target/X86/InstPrinter/X86InstComments.cpp | 232 + lib/Target/X86/InstPrinter/X86InstComments.h | 25 + lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp | 139 + lib/Target/X86/InstPrinter/X86IntelInstPrinter.h | 95 + lib/Target/X86/Makefile | 2 +- lib/Target/X86/README-SSE.txt | 50 +- lib/Target/X86/README-X86-64.txt | 44 - lib/Target/X86/README.txt | 335 +- lib/Target/X86/Utils/CMakeLists.txt | 6 + lib/Target/X86/Utils/Makefile | 15 + lib/Target/X86/Utils/X86ShuffleDecode.cpp | 148 + lib/Target/X86/Utils/X86ShuffleDecode.h | 69 + lib/Target/X86/X86.h | 10 + lib/Target/X86/X86.td | 28 +- lib/Target/X86/X86AsmBackend.cpp | 270 +- lib/Target/X86/X86AsmPrinter.cpp | 97 +- lib/Target/X86/X86AsmPrinter.h | 2 - lib/Target/X86/X86CallingConv.td | 67 +- lib/Target/X86/X86CodeEmitter.cpp | 21 +- lib/Target/X86/X86ELFWriterInfo.cpp | 55 +- lib/Target/X86/X86ELFWriterInfo.h | 19 +- lib/Target/X86/X86FastISel.cpp | 300 +- lib/Target/X86/X86FixupKinds.h | 16 +- lib/Target/X86/X86FloatingPoint.cpp | 129 +- lib/Target/X86/X86FrameLowering.cpp | 994 ++ lib/Target/X86/X86FrameLowering.h | 65 + lib/Target/X86/X86ISelDAGToDAG.cpp | 200 +- lib/Target/X86/X86ISelLowering.cpp | 3194 +++-- lib/Target/X86/X86ISelLowering.h | 243 +- lib/Target/X86/X86Instr3DNow.td | 77 + lib/Target/X86/X86Instr64bit.td | 2250 ---- lib/Target/X86/X86InstrArithmetic.td | 1125 ++ lib/Target/X86/X86InstrBuilder.h | 37 +- lib/Target/X86/X86InstrCMovSetCC.td | 104 + lib/Target/X86/X86InstrCompiler.td | 1626 +++ lib/Target/X86/X86InstrControl.td | 294 + lib/Target/X86/X86InstrExtension.td | 172 + lib/Target/X86/X86InstrFPStack.td | 82 +- lib/Target/X86/X86InstrFormats.td | 24 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 107 +- lib/Target/X86/X86InstrInfo.cpp | 448 +- lib/Target/X86/X86InstrInfo.h | 84 +- lib/Target/X86/X86InstrInfo.td | 4842 ++----- lib/Target/X86/X86InstrMMX.td | 607 +- lib/Target/X86/X86InstrSSE.td | 571 +- lib/Target/X86/X86InstrShiftRotate.td | 746 ++ lib/Target/X86/X86InstrSystem.td | 390 + lib/Target/X86/X86InstrVMX.td | 54 + lib/Target/X86/X86JITInfo.cpp | 16 +- lib/Target/X86/X86MCAsmInfo.cpp | 15 +- lib/Target/X86/X86MCCodeEmitter.cpp | 149 +- lib/Target/X86/X86MCInstLower.cpp | 117 +- lib/Target/X86/X86MCInstLower.h | 2 - lib/Target/X86/X86MachObjectWriter.cpp | 32 + lib/Target/X86/X86RegisterInfo.cpp | 955 +- lib/Target/X86/X86RegisterInfo.h | 17 +- lib/Target/X86/X86RegisterInfo.td | 100 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 52 +- lib/Target/X86/X86SelectionDAGInfo.h | 9 +- lib/Target/X86/X86ShuffleDecode.h | 155 - lib/Target/X86/X86Subtarget.cpp | 18 +- lib/Target/X86/X86Subtarget.h | 36 +- lib/Target/X86/X86TargetMachine.cpp | 55 +- lib/Target/X86/X86TargetMachine.h | 75 +- lib/Target/XCore/AsmPrinter/CMakeLists.txt | 6 - lib/Target/XCore/AsmPrinter/Makefile | 16 - lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp | 280 - lib/Target/XCore/CMakeLists.txt | 5 +- lib/Target/XCore/Makefile | 2 +- lib/Target/XCore/TargetInfo/CMakeLists.txt | 2 +- lib/Target/XCore/XCoreAsmPrinter.cpp | 280 + lib/Target/XCore/XCoreCallingConv.td | 3 + lib/Target/XCore/XCoreFrameInfo.cpp | 27 - lib/Target/XCore/XCoreFrameInfo.h | 34 - lib/Target/XCore/XCoreFrameLowering.cpp | 387 + lib/Target/XCore/XCoreFrameLowering.h | 59 + lib/Target/XCore/XCoreISelDAGToDAG.cpp | 21 +- lib/Target/XCore/XCoreISelLowering.cpp | 172 +- lib/Target/XCore/XCoreISelLowering.h | 1 + lib/Target/XCore/XCoreInstrInfo.cpp | 66 +- lib/Target/XCore/XCoreInstrInfo.h | 9 - lib/Target/XCore/XCoreInstrInfo.td | 76 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 284 +- lib/Target/XCore/XCoreRegisterInfo.h | 11 - lib/Target/XCore/XCoreRegisterInfo.td | 4 +- lib/Target/XCore/XCoreTargetMachine.cpp | 2 +- lib/Target/XCore/XCoreTargetMachine.h | 8 +- lib/Target/XCore/XCoreTargetObjectFile.cpp | 49 +- 560 files changed, 68036 insertions(+), 51315 deletions(-) create mode 100644 lib/Target/ARM/ARMAsmBackend.cpp create mode 100644 lib/Target/ARM/ARMAsmPrinter.h create mode 100644 lib/Target/ARM/ARMBaseInfo.h create mode 100644 lib/Target/ARM/ARMCallingConv.h create mode 100644 lib/Target/ARM/ARMELFWriterInfo.cpp create mode 100644 lib/Target/ARM/ARMELFWriterInfo.h create mode 100644 lib/Target/ARM/ARMFixupKinds.h delete mode 100644 lib/Target/ARM/ARMFrameInfo.h create mode 100644 lib/Target/ARM/ARMFrameLowering.cpp create mode 100644 lib/Target/ARM/ARMFrameLowering.h create mode 100644 lib/Target/ARM/ARMHazardRecognizer.cpp create mode 100644 lib/Target/ARM/ARMHazardRecognizer.h create mode 100644 lib/Target/ARM/ARMMCCodeEmitter.cpp create mode 100644 lib/Target/ARM/ARMMCExpr.cpp create mode 100644 lib/Target/ARM/ARMMCExpr.h delete mode 100644 lib/Target/ARM/ARMMCInstLower.h delete mode 100644 lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp delete mode 100644 lib/Target/ARM/AsmPrinter/ARMInstPrinter.h delete mode 100644 lib/Target/ARM/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/ARM/AsmPrinter/Makefile create mode 100644 lib/Target/ARM/Disassembler/CMakeLists.txt create mode 100644 lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp create mode 100644 lib/Target/ARM/InstPrinter/ARMInstPrinter.h create mode 100644 lib/Target/ARM/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/ARM/InstPrinter/Makefile create mode 100644 lib/Target/ARM/MLxExpansionPass.cpp delete mode 100644 lib/Target/ARM/NEONPreAllocPass.cpp create mode 100644 lib/Target/ARM/Thumb1FrameLowering.cpp create mode 100644 lib/Target/ARM/Thumb1FrameLowering.h delete mode 100644 lib/Target/ARM/Thumb2HazardRecognizer.cpp delete mode 100644 lib/Target/ARM/Thumb2HazardRecognizer.h create mode 100644 lib/Target/Alpha/AlphaAsmPrinter.cpp delete mode 100644 lib/Target/Alpha/AlphaCodeEmitter.cpp create mode 100644 lib/Target/Alpha/AlphaFrameLowering.cpp create mode 100644 lib/Target/Alpha/AlphaFrameLowering.h delete mode 100644 lib/Target/Alpha/AlphaJITInfo.cpp delete mode 100644 lib/Target/Alpha/AlphaJITInfo.h delete mode 100644 lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp delete mode 100644 lib/Target/Alpha/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/Alpha/AsmPrinter/Makefile delete mode 100644 lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp delete mode 100644 lib/Target/Blackfin/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/Blackfin/AsmPrinter/Makefile create mode 100644 lib/Target/Blackfin/BlackfinAsmPrinter.cpp create mode 100644 lib/Target/Blackfin/BlackfinFrameLowering.cpp create mode 100644 lib/Target/Blackfin/BlackfinFrameLowering.h delete mode 100644 lib/Target/CellSPU/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/AsmPrinter/Makefile delete mode 100644 lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp create mode 100644 lib/Target/CellSPU/SPUAsmPrinter.cpp delete mode 100644 lib/Target/CellSPU/SPUFrameInfo.cpp delete mode 100644 lib/Target/CellSPU/SPUFrameInfo.h create mode 100644 lib/Target/CellSPU/SPUFrameLowering.cpp create mode 100644 lib/Target/CellSPU/SPUFrameLowering.h create mode 100644 lib/Target/CellSPU/SPUNopFiller.cpp create mode 100644 lib/Target/MBlaze/AsmParser/CMakeLists.txt create mode 100644 lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp create mode 100644 lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp create mode 100644 lib/Target/MBlaze/AsmParser/Makefile delete mode 100644 lib/Target/MBlaze/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp delete mode 100644 lib/Target/MBlaze/AsmPrinter/Makefile create mode 100644 lib/Target/MBlaze/Disassembler/CMakeLists.txt create mode 100644 lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp create mode 100644 lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h create mode 100644 lib/Target/MBlaze/Disassembler/Makefile create mode 100644 lib/Target/MBlaze/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp create mode 100644 lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h create mode 100644 lib/Target/MBlaze/InstPrinter/Makefile create mode 100644 lib/Target/MBlaze/MBlazeAsmBackend.cpp create mode 100644 lib/Target/MBlaze/MBlazeAsmPrinter.cpp create mode 100644 lib/Target/MBlaze/MBlazeELFWriterInfo.cpp create mode 100644 lib/Target/MBlaze/MBlazeELFWriterInfo.h create mode 100644 lib/Target/MBlaze/MBlazeFrameLowering.cpp create mode 100644 lib/Target/MBlaze/MBlazeFrameLowering.h create mode 100644 lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp create mode 100644 lib/Target/MBlaze/MBlazeMCInstLower.cpp create mode 100644 lib/Target/MBlaze/MBlazeMCInstLower.h create mode 100644 lib/Target/MBlaze/MBlazeRelocations.h create mode 100644 lib/Target/MBlaze/TODO delete mode 100644 lib/Target/MSP430/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp delete mode 100644 lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h delete mode 100644 lib/Target/MSP430/AsmPrinter/Makefile create mode 100644 lib/Target/MSP430/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp create mode 100644 lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h create mode 100644 lib/Target/MSP430/InstPrinter/Makefile create mode 100644 lib/Target/MSP430/MSP430AsmPrinter.cpp create mode 100644 lib/Target/MSP430/MSP430FrameLowering.cpp create mode 100644 lib/Target/MSP430/MSP430FrameLowering.h create mode 100644 lib/Target/MSP430/MSP430MCInstLower.cpp create mode 100644 lib/Target/MSP430/MSP430MCInstLower.h delete mode 100644 lib/Target/Mips/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/Mips/AsmPrinter/Makefile delete mode 100644 lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp create mode 100644 lib/Target/Mips/MipsAsmPrinter.cpp create mode 100644 lib/Target/Mips/MipsFrameLowering.cpp create mode 100644 lib/Target/Mips/MipsFrameLowering.h delete mode 100644 lib/Target/PIC16/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/PIC16/AsmPrinter/Makefile delete mode 100644 lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp delete mode 100644 lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h delete mode 100644 lib/Target/PIC16/CMakeLists.txt delete mode 100644 lib/Target/PIC16/Makefile delete mode 100644 lib/Target/PIC16/PIC16.h delete mode 100644 lib/Target/PIC16/PIC16.td delete mode 100644 lib/Target/PIC16/PIC16ABINames.h delete mode 100644 lib/Target/PIC16/PIC16DebugInfo.cpp delete mode 100644 lib/Target/PIC16/PIC16DebugInfo.h delete mode 100644 lib/Target/PIC16/PIC16ISelDAGToDAG.cpp delete mode 100644 lib/Target/PIC16/PIC16ISelDAGToDAG.h delete mode 100644 lib/Target/PIC16/PIC16ISelLowering.cpp delete mode 100644 lib/Target/PIC16/PIC16ISelLowering.h delete mode 100644 lib/Target/PIC16/PIC16InstrFormats.td delete mode 100644 lib/Target/PIC16/PIC16InstrInfo.cpp delete mode 100644 lib/Target/PIC16/PIC16InstrInfo.h delete mode 100644 lib/Target/PIC16/PIC16InstrInfo.td delete mode 100644 lib/Target/PIC16/PIC16MCAsmInfo.cpp delete mode 100644 lib/Target/PIC16/PIC16MCAsmInfo.h delete mode 100644 lib/Target/PIC16/PIC16MachineFunctionInfo.h delete mode 100644 lib/Target/PIC16/PIC16MemSelOpt.cpp delete mode 100644 lib/Target/PIC16/PIC16Passes/Makefile delete mode 100644 lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp delete mode 100644 lib/Target/PIC16/PIC16Passes/PIC16Cloner.h delete mode 100644 lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp delete mode 100644 lib/Target/PIC16/PIC16Passes/PIC16Overlay.h delete mode 100644 lib/Target/PIC16/PIC16RegisterInfo.cpp delete mode 100644 lib/Target/PIC16/PIC16RegisterInfo.h delete mode 100644 lib/Target/PIC16/PIC16RegisterInfo.td delete mode 100644 lib/Target/PIC16/PIC16Section.cpp delete mode 100644 lib/Target/PIC16/PIC16Section.h delete mode 100644 lib/Target/PIC16/PIC16SelectionDAGInfo.cpp delete mode 100644 lib/Target/PIC16/PIC16SelectionDAGInfo.h delete mode 100644 lib/Target/PIC16/PIC16Subtarget.cpp delete mode 100644 lib/Target/PIC16/PIC16Subtarget.h delete mode 100644 lib/Target/PIC16/PIC16TargetMachine.cpp delete mode 100644 lib/Target/PIC16/PIC16TargetMachine.h delete mode 100644 lib/Target/PIC16/PIC16TargetObjectFile.cpp delete mode 100644 lib/Target/PIC16/PIC16TargetObjectFile.h delete mode 100644 lib/Target/PIC16/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/PIC16/TargetInfo/Makefile delete mode 100644 lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp create mode 100644 lib/Target/PTX/CMakeLists.txt create mode 100644 lib/Target/PTX/Makefile create mode 100644 lib/Target/PTX/PTX.h create mode 100644 lib/Target/PTX/PTX.td create mode 100644 lib/Target/PTX/PTXAsmPrinter.cpp create mode 100644 lib/Target/PTX/PTXFrameLowering.cpp create mode 100644 lib/Target/PTX/PTXFrameLowering.h create mode 100644 lib/Target/PTX/PTXISelDAGToDAG.cpp create mode 100644 lib/Target/PTX/PTXISelLowering.cpp create mode 100644 lib/Target/PTX/PTXISelLowering.h create mode 100644 lib/Target/PTX/PTXInstrFormats.td create mode 100644 lib/Target/PTX/PTXInstrInfo.cpp create mode 100644 lib/Target/PTX/PTXInstrInfo.h create mode 100644 lib/Target/PTX/PTXInstrInfo.td create mode 100644 lib/Target/PTX/PTXMCAsmInfo.cpp create mode 100644 lib/Target/PTX/PTXMCAsmInfo.h create mode 100644 lib/Target/PTX/PTXMCAsmStreamer.cpp create mode 100644 lib/Target/PTX/PTXMFInfoExtract.cpp create mode 100644 lib/Target/PTX/PTXMachineFunctionInfo.h create mode 100644 lib/Target/PTX/PTXRegisterInfo.cpp create mode 100644 lib/Target/PTX/PTXRegisterInfo.h create mode 100644 lib/Target/PTX/PTXRegisterInfo.td create mode 100644 lib/Target/PTX/PTXSubtarget.cpp create mode 100644 lib/Target/PTX/PTXSubtarget.h create mode 100644 lib/Target/PTX/PTXTargetMachine.cpp create mode 100644 lib/Target/PTX/PTXTargetMachine.h create mode 100644 lib/Target/PTX/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/PTX/TargetInfo/Makefile create mode 100644 lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp delete mode 100644 lib/Target/PowerPC/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/PowerPC/AsmPrinter/Makefile delete mode 100644 lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp create mode 100644 lib/Target/PowerPC/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/PowerPC/InstPrinter/Makefile create mode 100644 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp create mode 100644 lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h create mode 100644 lib/Target/PowerPC/PPCAsmBackend.cpp create mode 100644 lib/Target/PowerPC/PPCAsmPrinter.cpp create mode 100644 lib/Target/PowerPC/PPCFixupKinds.h delete mode 100644 lib/Target/PowerPC/PPCFrameInfo.h create mode 100644 lib/Target/PowerPC/PPCFrameLowering.cpp create mode 100644 lib/Target/PowerPC/PPCFrameLowering.h create mode 100644 lib/Target/PowerPC/PPCMCCodeEmitter.cpp create mode 100644 lib/Target/PowerPC/PPCMCInstLower.cpp delete mode 100644 lib/Target/Sparc/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/Sparc/AsmPrinter/Makefile delete mode 100644 lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp create mode 100644 lib/Target/Sparc/SparcAsmPrinter.cpp create mode 100644 lib/Target/Sparc/SparcFrameLowering.cpp create mode 100644 lib/Target/Sparc/SparcFrameLowering.h delete mode 100644 lib/Target/SystemZ/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/SystemZ/AsmPrinter/Makefile delete mode 100644 lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp create mode 100644 lib/Target/SystemZ/SystemZAsmPrinter.cpp create mode 100644 lib/Target/SystemZ/SystemZFrameLowering.cpp create mode 100644 lib/Target/SystemZ/SystemZFrameLowering.h create mode 100644 lib/Target/TargetAsmInfo.cpp delete mode 100644 lib/Target/TargetFrameInfo.cpp create mode 100644 lib/Target/TargetFrameLowering.cpp create mode 100644 lib/Target/TargetLibraryInfo.cpp delete mode 100644 lib/Target/X86/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/X86/AsmPrinter/Makefile delete mode 100644 lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp delete mode 100644 lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h delete mode 100644 lib/Target/X86/AsmPrinter/X86InstComments.cpp delete mode 100644 lib/Target/X86/AsmPrinter/X86InstComments.h delete mode 100644 lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp delete mode 100644 lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h create mode 100644 lib/Target/X86/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/X86/InstPrinter/Makefile create mode 100644 lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp create mode 100644 lib/Target/X86/InstPrinter/X86ATTInstPrinter.h create mode 100644 lib/Target/X86/InstPrinter/X86InstComments.cpp create mode 100644 lib/Target/X86/InstPrinter/X86InstComments.h create mode 100644 lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp create mode 100644 lib/Target/X86/InstPrinter/X86IntelInstPrinter.h create mode 100644 lib/Target/X86/Utils/CMakeLists.txt create mode 100644 lib/Target/X86/Utils/Makefile create mode 100644 lib/Target/X86/Utils/X86ShuffleDecode.cpp create mode 100644 lib/Target/X86/Utils/X86ShuffleDecode.h create mode 100644 lib/Target/X86/X86FrameLowering.cpp create mode 100644 lib/Target/X86/X86FrameLowering.h create mode 100644 lib/Target/X86/X86Instr3DNow.td delete mode 100644 lib/Target/X86/X86Instr64bit.td create mode 100644 lib/Target/X86/X86InstrArithmetic.td create mode 100644 lib/Target/X86/X86InstrCMovSetCC.td create mode 100644 lib/Target/X86/X86InstrCompiler.td create mode 100644 lib/Target/X86/X86InstrControl.td create mode 100644 lib/Target/X86/X86InstrExtension.td create mode 100644 lib/Target/X86/X86InstrShiftRotate.td create mode 100644 lib/Target/X86/X86InstrSystem.td create mode 100644 lib/Target/X86/X86InstrVMX.td create mode 100644 lib/Target/X86/X86MachObjectWriter.cpp delete mode 100644 lib/Target/X86/X86ShuffleDecode.h delete mode 100644 lib/Target/XCore/AsmPrinter/CMakeLists.txt delete mode 100644 lib/Target/XCore/AsmPrinter/Makefile delete mode 100644 lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp create mode 100644 lib/Target/XCore/XCoreAsmPrinter.cpp delete mode 100644 lib/Target/XCore/XCoreFrameInfo.cpp delete mode 100644 lib/Target/XCore/XCoreFrameInfo.h create mode 100644 lib/Target/XCore/XCoreFrameLowering.cpp create mode 100644 lib/Target/XCore/XCoreFrameLowering.h (limited to 'lib/Target') diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 271ca44..4679f74 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -15,6 +15,7 @@ #ifndef TARGET_ARM_H #define TARGET_ARM_H +#include "ARMBaseInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include <cassert> @@ -25,97 +26,17 @@ class ARMBaseTargetMachine; class FunctionPass; class JITCodeEmitter; class formatted_raw_ostream; +class MCCodeEmitter; +class TargetAsmBackend; +class MachineInstr; +class ARMAsmPrinter; +class MCInst; -// Enums corresponding to ARM condition codes -namespace ARMCC { - // The CondCodes constants map directly to the 4-bit encoding of the - // condition field for predicated instructions. - enum CondCodes { // Meaning (integer) Meaning (floating-point) - EQ, // Equal Equal - NE, // Not equal Not equal, or unordered - HS, // Carry set >, ==, or unordered - LO, // Carry clear Less than - MI, // Minus, negative Less than - PL, // Plus, positive or zero >, ==, or unordered - VS, // Overflow Unordered - VC, // No overflow Not unordered - HI, // Unsigned higher Greater than, or unordered - LS, // Unsigned lower or same Less than or equal - GE, // Greater than or equal Greater than or equal - LT, // Less than Less than, or unordered - GT, // Greater than Greater than - LE, // Less than or equal <, ==, or unordered - AL // Always (unconditional) Always (unconditional) - }; +MCCodeEmitter *createARMMCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx); - inline static CondCodes getOppositeCondition(CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return NE; - case NE: return EQ; - case HS: return LO; - case LO: return HS; - case MI: return PL; - case PL: return MI; - case VS: return VC; - case VC: return VS; - case HI: return LS; - case LS: return HI; - case GE: return LT; - case LT: return GE; - case GT: return LE; - case LE: return GT; - } - } -} // namespace ARMCC - -inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case ARMCC::EQ: return "eq"; - case ARMCC::NE: return "ne"; - case ARMCC::HS: return "hs"; - case ARMCC::LO: return "lo"; - case ARMCC::MI: return "mi"; - case ARMCC::PL: return "pl"; - case ARMCC::VS: return "vs"; - case ARMCC::VC: return "vc"; - case ARMCC::HI: return "hi"; - case ARMCC::LS: return "ls"; - case ARMCC::GE: return "ge"; - case ARMCC::LT: return "lt"; - case ARMCC::GT: return "gt"; - case ARMCC::LE: return "le"; - case ARMCC::AL: return "al"; - } -} - -namespace ARM_MB { - // The Memory Barrier Option constants map directly to the 4-bit encoding of - // the option field for memory barrier operations. - enum MemBOpt { - ST = 14, - ISH = 11, - ISHST = 10, - NSH = 7, - NSHST = 6, - OSH = 3, - OSHST = 2 - }; - - inline static const char *MemBOptToString(unsigned val) { - switch (val) { - default: llvm_unreachable("Unknown memory opetion"); - case ST: return "st"; - case ISH: return "ish"; - case ISHST: return "ishst"; - case NSH: return "nsh"; - case NSHST: return "nshst"; - case OSH: return "osh"; - case OSHST: return "oshst"; - } - } -} // namespace ARM_MB +TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &); FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); @@ -127,23 +48,16 @@ FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalMergePass(const TargetLowering* tli); FunctionPass *createARMConstantIslandPass(); -FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); +FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); extern Target TheARMTarget, TheThumbTarget; -} // end namespace llvm; - -// Defines symbolic names for ARM registers. This defines a mapping from -// register name to register number. -// -#include "ARMGenRegisterNames.inc" - -// Defines symbolic names for the ARM instructions. -// -#include "ARMGenInstrNames.inc" +void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + ARMAsmPrinter &AP); +} // end namespace llvm; #endif diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index d6a8f19..bf4315f 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -16,6 +16,7 @@ include "llvm/Target/Target.td" + //===----------------------------------------------------------------------===// // ARM Subtarget features. // @@ -32,6 +33,8 @@ def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", "Does not support ARM mode execution">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", + "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", "Enable divide instructions">; def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", @@ -43,14 +46,11 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", "Floating point unit supports single precision only">; -// Some processors have multiply-accumulate instructions that don't -// play nicely with other VFP instructions, and it's generally better +// Some processors have FP multiply-accumulate instructions that don't +// play nicely with other VFP / NEON instructions, and it's generally better // to just not use them. -// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for -// others as well. We should do more benchmarking and confirm one way or -// the other. -def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true", - "Disable VFP MAC instructions">; +def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", + "Disable VFP / NEON MAC instructions">; // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", @@ -61,6 +61,9 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; +// Multiprocessing extension. +def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", + "Supports Multiprocessing extension">; // ARM architectures. def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T", @@ -91,6 +94,18 @@ def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", include "ARMSchedule.td" +// ARM processor families. +def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others", + "One of the other ARM processor families">; +def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", + "Cortex-A8 ARM processors", + [FeatureSlowFPBrcc, FeatureNEONForFP, + FeatureHasSlowFPVMLx, FeatureT2XtPk]>; +def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", + "Cortex-A9 ARM processors", + [FeatureHasSlowFPVMLx, FeatureT2XtPk, + FeatureFP16]>; + class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, GenericItineraries, Features>; @@ -135,25 +150,27 @@ def : ProcNoItin<"iwmmxt", [ArchV5TE]>; // V6 Processors. def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>; def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2, - FeatureHasSlowVMLx]>; + FeatureHasSlowFPVMLx]>; def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>; -def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2, + FeatureHasSlowFPVMLx]>; def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>; -def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2, + FeatureHasSlowFPVMLx]>; // V6M Processors. def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>; // V6T2 Processors. def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>; -def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2, + FeatureHasSlowFPVMLx]>; // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureHasSlowVMLx, - FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>; + [ArchV7A, ProcA8]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureT2XtPk]>; + [ArchV7A, ProcA9]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [ArchV7M]>; @@ -175,6 +192,17 @@ include "ARMInstrInfo.td" def ARMInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// ARM Uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def ARMAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// @@ -182,4 +210,6 @@ def ARMInstrInfo : InstrInfo; def ARM : Target { // Pull in Instruction Info: let InstructionSet = ARMInstrInfo; + + let AssemblyWriters = [ARMAsmWriter]; } diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index db48100..19fbf05 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -50,6 +50,16 @@ namespace ARM_AM { } } + static inline unsigned getShiftOpcEncoding(ShiftOpc Op) { + switch (Op) { + default: assert(0 && "Unknown shift opc!"); + case ARM_AM::asr: return 2; + case ARM_AM::lsl: return 0; + case ARM_AM::lsr: return 1; + case ARM_AM::ror: return 3; + } + } + static inline ShiftOpc getShiftOpcForNode(SDValue N) { switch (N.getOpcode()) { default: return ARM_AM::no_shift; @@ -566,6 +576,8 @@ namespace ARM_AM { return Val; } + AMSubMode getLoadStoreMultipleSubMode(int Opcode); + } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp new file mode 100644 index 0000000..ec23449 --- /dev/null +++ b/lib/Target/ARM/ARMAsmBackend.cpp @@ -0,0 +1,512 @@ +//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMFixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +namespace { +class ARMMachObjectWriter : public MCMachObjectTargetWriter { +public: + ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, + uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, + /*UseAggressiveSymbolFolding=*/true) {} +}; + +class ARMELFObjectWriter : public MCELFObjectTargetWriter { +public: + ARMELFObjectWriter(Triple::OSType OSType) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM, + /*HasRelocationAddend*/ false) {} +}; + +class ARMAsmBackend : public TargetAsmBackend { + bool isThumbMode; // Currently emitting Thumb code. +public: + ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {} + + unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = { +// This table *must* be in the order that the fixup_* kinds are defined in +// ARMFixupKinds.h. +// +// Name Offset (bits) Size (bits) Flags +{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, +{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_blx", 7, 21, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_cp", 1, 8, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_bcc", 1, 8, MCFixupKindInfo::FKF_IsPCRel }, +// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. +{ "fixup_arm_movt_hi16", 0, 20, 0 }, +{ "fixup_arm_movw_lo16", 0, 20, 0 }, +{ "fixup_t2_movt_hi16", 0, 20, 0 }, +{ "fixup_t2_movw_lo16", 0, 20, 0 }, +{ "fixup_arm_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_movt_hi16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_t2_movw_lo16_pcrel", 0, 20, MCFixupKindInfo::FKF_IsPCRel }, + }; + + if (Kind < FirstTargetFixupKind) + return TargetAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + bool MayNeedRelaxation(const MCInst &Inst) const; + + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; + + bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; + + void HandleAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + default: break; + case MCAF_Code16: + setIsThumb(true); + break; + case MCAF_Code32: + setIsThumb(false); + break; + } + } + + unsigned getPointerSize() const { return 4; } + bool isThumb() const { return isThumbMode; } + void setIsThumb(bool it) { isThumbMode = it; } +}; +} // end anonymous namespace + +bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { + // FIXME: Thumb targets, different move constant targets.. + return false; +} + +void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented"); + return; +} + +bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + if (isThumb()) { + // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to + // use 0x46c0 (which is a 'mov r8, r8' insn). + uint64_t NumNops = Count / 2; + for (uint64_t i = 0; i != NumNops; ++i) + OW->Write16(0xbf00); + if (Count & 1) + OW->Write8(0); + return true; + } + // ARM mode + uint64_t NumNops = Count / 4; + for (uint64_t i = 0; i != NumNops; ++i) + OW->Write32(0xe1a00000); + switch (Count % 4) { + default: break; // No leftover bytes to write + case 1: OW->Write8(0); break; + case 2: OW->Write16(0); break; + case 3: OW->Write16(0); OW->Write8(0xa0); break; + } + + return true; +} + +static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: + return Value; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + Value >>= 16; + // Fallthrough + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: { + unsigned Hi4 = (Value & 0xF000) >> 12; + unsigned Lo12 = Value & 0x0FFF; + // inst{19-16} = Hi4; + // inst{11-0} = Lo12; + Value = (Hi4 << 16) | (Lo12); + return Value; + } + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + Value >>= 16; + // Fallthrough + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: { + unsigned Hi4 = (Value & 0xF000) >> 12; + unsigned i = (Value & 0x800) >> 11; + unsigned Mid3 = (Value & 0x700) >> 8; + unsigned Lo8 = Value & 0x0FF; + // inst{19-16} = Hi4; + // inst{26} = i; + // inst{14-12} = Mid3; + // inst{7-0} = Lo8; + Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); + + uint64_t swapped = (Value & 0xFFFF0000) >> 16; + swapped |= (Value & 0x0000FFFF) << 16; + return swapped; + } + case ARM::fixup_arm_ldst_pcrel_12: + // ARM PC-relative values are offset by 8. + Value -= 4; + // FALLTHROUGH + case ARM::fixup_t2_ldst_pcrel_12: { + // Offset by 4, adjusted by two due to the half-word ordering of thumb. + Value -= 4; + bool isAdd = true; + if ((int64_t)Value < 0) { + Value = -Value; + isAdd = false; + } + assert ((Value < 4096) && "Out of range pc-relative fixup value!"); + Value |= isAdd << 23; + + // Same addressing mode as fixup_arm_pcrel_10, + // but with 16-bit halfwords swapped. + if (Kind == ARM::fixup_t2_ldst_pcrel_12) { + uint64_t swapped = (Value & 0xFFFF0000) >> 16; + swapped |= (Value & 0x0000FFFF) << 16; + return swapped; + } + + return Value; + } + case ARM::fixup_thumb_adr_pcrel_10: + return ((Value - 4) >> 2) & 0xff; + case ARM::fixup_arm_adr_pcrel_12: { + // ARM PC-relative values are offset by 8. + Value -= 8; + unsigned opc = 4; // bits {24-21}. Default to add: 0b0100 + if ((int64_t)Value < 0) { + Value = -Value; + opc = 2; // 0b0010 + } + assert(ARM_AM::getSOImmVal(Value) != -1 && + "Out of range pc-relative fixup value!"); + // Encode the immediate and shift the opcode into place. + return ARM_AM::getSOImmVal(Value) | (opc << 21); + } + + case ARM::fixup_t2_adr_pcrel_12: { + Value -= 4; + unsigned opc = 0; + if ((int64_t)Value < 0) { + Value = -Value; + opc = 5; + } + + uint32_t out = (opc << 21); + out |= (Value & 0x800) << 14; + out |= (Value & 0x700) << 4; + out |= (Value & 0x0FF); + + uint64_t swapped = (out & 0xFFFF0000) >> 16; + swapped |= (out & 0x0000FFFF) << 16; + return swapped; + } + + case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: + // These values don't encode the low two bits since they're always zero. + // Offset by 8 just as above. + return 0xffffff & ((Value - 8) >> 2); + case ARM::fixup_t2_uncondbranch: { + Value = Value - 4; + Value >>= 1; // Low bit is not encoded. + + uint32_t out = 0; + bool I = Value & 0x800000; + bool J1 = Value & 0x400000; + bool J2 = Value & 0x200000; + J1 ^= I; + J2 ^= I; + + out |= I << 26; // S bit + out |= !J1 << 13; // J1 bit + out |= !J2 << 11; // J2 bit + out |= (Value & 0x1FF800) << 5; // imm6 field + out |= (Value & 0x0007FF); // imm11 field + + uint64_t swapped = (out & 0xFFFF0000) >> 16; + swapped |= (out & 0x0000FFFF) << 16; + return swapped; + } + case ARM::fixup_t2_condbranch: { + Value = Value - 4; + Value >>= 1; // Low bit is not encoded. + + uint64_t out = 0; + out |= (Value & 0x80000) << 7; // S bit + out |= (Value & 0x40000) >> 7; // J2 bit + out |= (Value & 0x20000) >> 4; // J1 bit + out |= (Value & 0x1F800) << 5; // imm6 field + out |= (Value & 0x007FF); // imm11 field + + uint32_t swapped = (out & 0xFFFF0000) >> 16; + swapped |= (out & 0x0000FFFF) << 16; + return swapped; + } + case ARM::fixup_arm_thumb_bl: { + // The value doesn't encode the low bit (always zero) and is offset by + // four. The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit + // + // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII + // + // Note that the halfwords are stored high first, low second; so we need + // to transpose the fixup value here to map properly. + unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0; + uint32_t Binary = 0; + Value = 0x3fffff & ((Value - 4) >> 1); + Binary = (Value & 0x7ff) << 16; // Low imm11 value. + Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value. + Binary |= isNeg << 10; // Sign bit. + return Binary; + } + case ARM::fixup_arm_thumb_blx: { + // The value doesn't encode the low two bits (always zero) and is offset by + // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit + // positions in the destination opcode. x = unchanged, I = immediate value + // bit, S = sign extension bit, 0 = zero. + // + // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0 + // + // Note that the halfwords are stored high first, low second; so we need + // to transpose the fixup value here to map properly. + unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0; + uint32_t Binary = 0; + Value = 0xfffff & ((Value - 2) >> 2); + Binary = (Value & 0x3ff) << 17; // Low imm10L value. + Binary |= (Value & 0xffc00) >> 10; // High imm10H value. + Binary |= isNeg << 10; // Sign bit. + return Binary; + } + case ARM::fixup_arm_thumb_cp: + // Offset by 4, and don't encode the low two bits. Two bytes of that + // 'off by 4' is implicitly handled by the half-word ordering of the + // Thumb encoding, so we only need to adjust by 2 here. + return ((Value - 2) >> 2) & 0xff; + case ARM::fixup_arm_thumb_cb: { + // Offset by 4 and don't encode the lower bit, which is always 0. + uint32_t Binary = (Value - 4) >> 1; + return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3); + } + case ARM::fixup_arm_thumb_br: + // Offset by 4 and don't encode the lower bit, which is always 0. + return ((Value - 4) >> 1) & 0x7ff; + case ARM::fixup_arm_thumb_bcc: + // Offset by 4 and don't encode the lower bit, which is always 0. + return ((Value - 4) >> 1) & 0xff; + case ARM::fixup_arm_pcrel_10: + Value = Value - 4; // ARM fixups offset by an additional word and don't + // need to adjust for the half-word ordering. + // Fall through. + case ARM::fixup_t2_pcrel_10: { + // Offset by 4, adjusted by two due to the half-word ordering of thumb. + Value = Value - 4; + bool isAdd = true; + if ((int64_t)Value < 0) { + Value = -Value; + isAdd = false; + } + // These values don't encode the low two bits since they're always zero. + Value >>= 2; + assert ((Value < 256) && "Out of range pc-relative fixup value!"); + Value |= isAdd << 23; + + // Same addressing mode as fixup_arm_pcrel_10, + // but with 16-bit halfwords swapped. + if (Kind == ARM::fixup_t2_pcrel_10) { + uint32_t swapped = (Value & 0xFFFF0000) >> 16; + swapped |= (Value & 0x0000FFFF) << 16; + return swapped; + } + + return Value; + } + } +} + +namespace { + +// FIXME: This should be in a separate file. +// ELF is an ELF of course... +class ELFARMAsmBackend : public ARMAsmBackend { +public: + Triple::OSType OSType; + ELFARMAsmBackend(const Target &T, Triple::OSType _OSType) + : ARMAsmBackend(T), OSType(_OSType) { } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS, + /*IsLittleEndian*/ true); + } +}; + +// FIXME: Raise this to share code between Darwin and ELF. +void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value) const { + unsigned NumBytes = 4; // FIXME: 2 for Thumb + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + assert(Offset % NumBytes == 0 && "Offset mod NumBytes is nonzero!"); + + // For each byte of the fragment that the fixup touches, mask in the bits from + // the fixup value. The Value has been "split up" into the appropriate + // bitfields above. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); +} + +// FIXME: This should be in a separate file. +class DarwinARMAsmBackend : public ARMAsmBackend { +public: + DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + // FIXME: Subtarget info should be derived. Force v7 for now. + return createMachObjectWriter(new ARMMachObjectWriter( + /*Is64Bit=*/false, + object::mach::CTM_ARM, + object::mach::CSARM_V7), + OS, + /*IsLittleEndian=*/true); + } + + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } +}; + +/// getFixupKindNumBytes - The number of bytes the fixup may change. +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + + case FK_Data_1: + case ARM::fixup_arm_thumb_bcc: + case ARM::fixup_arm_thumb_cp: + case ARM::fixup_thumb_adr_pcrel_10: + return 1; + + case FK_Data_2: + case ARM::fixup_arm_thumb_br: + case ARM::fixup_arm_thumb_cb: + return 2; + + case ARM::fixup_arm_ldst_pcrel_12: + case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: + return 3; + + case FK_Data_4: + case ARM::fixup_t2_ldst_pcrel_12: + case ARM::fixup_t2_condbranch: + case ARM::fixup_t2_uncondbranch: + case ARM::fixup_t2_pcrel_10: + case ARM::fixup_t2_adr_pcrel_12: + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movt_hi16_pcrel: + case ARM::fixup_arm_movw_lo16_pcrel: + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movt_hi16_pcrel: + case ARM::fixup_t2_movw_lo16_pcrel: + return 4; + } +} + +void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value) const { + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); +} + +} // end anonymous namespace + +TargetAsmBackend *llvm::createARMAsmBackend(const Target &T, + const std::string &TT) { + switch (Triple(TT).getOS()) { + case Triple::Darwin: + return new DarwinARMAsmBackend(T); + case Triple::MinGW32: + case Triple::Cygwin: + case Triple::Win32: + assert(0 && "Windows not supported on ARM"); + default: + return new ELFARMAsmBackend(T, Triple(TT).getOS()); + } +} diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 6cfd596..db12b8e 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -14,28 +14,31 @@ #define DEBUG_TYPE "asm-printer" #include "ARM.h" -#include "ARMBuildAttrs.h" +#include "ARMAsmPrinter.h" #include "ARMAddressingModes.h" +#include "ARMBuildAttrs.h" +#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" -#include "AsmPrinter/ARMInstPrinter.h" #include "ARMMachineFunctionInfo.h" -#include "ARMMCInstLower.h" +#include "ARMMCExpr.h" #include "ARMTargetMachine.h" +#include "ARMTargetObjectFile.h" +#include "InstPrinter/ARMInstPrinter.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" @@ -53,270 +56,127 @@ #include <cctype> using namespace llvm; -static cl::opt<bool> -EnableMCInst("enable-arm-mcinst-printer", cl::Hidden, - cl::desc("enable experimental asmprinter gunk in the arm backend")); - -namespace llvm { - namespace ARM { - enum DW_ISA { - DW_ISA_ARM_thumb = 1, - DW_ISA_ARM_arm = 2 - }; - } -} - namespace { - class ARMAsmPrinter : public AsmPrinter { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can - /// make the right decision when printing asm code for different targets. - const ARMSubtarget *Subtarget; - - /// AFI - Keep a pointer to ARMFunctionInfo for the current - /// MachineFunction. - ARMFunctionInfo *AFI; + // Per section and per symbol attributes are not supported. + // To implement them we would need the ability to delay this emission + // until the assembly file is fully parsed/generated as only then do we + // know the symbol and section numbers. + class AttributeEmitter { + public: + virtual void MaybeSwitchVendor(StringRef Vendor) = 0; + virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0; + virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0; + virtual void Finish() = 0; + virtual ~AttributeEmitter() {} + }; - /// MCP - Keep a pointer to constantpool entries of the current - /// MachineFunction. - const MachineConstantPool *MCP; + class AsmAttributeEmitter : public AttributeEmitter { + MCStreamer &Streamer; public: - explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) { - Subtarget = &TM.getSubtarget<ARMSubtarget>(); - } + AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {} + void MaybeSwitchVendor(StringRef Vendor) { } - virtual const char *getPassName() const { - return "ARM Assembly Printer"; + void EmitAttribute(unsigned Attribute, unsigned Value) { + Streamer.EmitRawText("\t.eabi_attribute " + + Twine(Attribute) + ", " + Twine(Value)); } - void printInstructionThroughMCStreamer(const MachineInstr *MI); - - - void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char *Modifier = 0); - void printSOImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); - void printSOImm2PartOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printSORegOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printAddrMode2Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printAddrMode3Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printAddrMode4Operand(const MachineInstr *MI, int OpNum,raw_ostream &O, - const char *Modifier = 0); - void printAddrMode5Operand(const MachineInstr *MI, int OpNum,raw_ostream &O, - const char *Modifier = 0); - void printAddrMode6Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printAddrModePCOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, - const char *Modifier = 0); - void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printMemBOption(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printShiftImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - - void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printThumbITMask(const MachineInstr *MI, int OpNum, raw_ostream &O); - void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O, - unsigned Scale); - void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - - void printT2SOOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); - void printT2AddrModeImm12Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printT2AddrModeImm8s4Operand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printT2AddrModeImm8s4OffsetOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) {} - void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - - void printCPSOptionOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) {} - void printMSRMaskOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) {} - void printNegZeroOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) {} - void printPredicateOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printSBitModifierOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printPCLabel(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printRegisterList(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printCPInstOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, - const char *Modifier); - void printJTBlockOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printJT2BlockOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printTBAddrMode(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printNoHashImmediate(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - void printNEONModImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - - virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O); - - void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen - static const char *getRegisterName(unsigned RegNo); - - virtual void EmitInstruction(const MachineInstr *MI); - bool runOnMachineFunction(MachineFunction &F); - - virtual void EmitConstantPool() {} // we emit constant pools customly! - virtual void EmitFunctionEntryLabel(); - void EmitStartOfAsmFile(Module &M); - void EmitEndOfAsmFile(Module &M); - - MachineLocation getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + void EmitTextAttribute(unsigned Attribute, StringRef String) { + switch (Attribute) { + case ARMBuildAttrs::CPU_name: + Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String)); + break; + default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } - return Location; + } + void Finish() { } + }; + + class ObjectAttributeEmitter : public AttributeEmitter { + MCObjectStreamer &Streamer; + StringRef CurrentVendor; + SmallString<64> Contents; + + public: + ObjectAttributeEmitter(MCObjectStreamer &Streamer_) : + Streamer(Streamer_), CurrentVendor("") { } + + void MaybeSwitchVendor(StringRef Vendor) { + assert(!Vendor.empty() && "Vendor cannot be empty."); + + if (CurrentVendor.empty()) + CurrentVendor = Vendor; + else if (CurrentVendor == Vendor) + return; + else + Finish(); + + CurrentVendor = Vendor; + + assert(Contents.size() == 0); } - virtual unsigned getISAEncoding() { - // ARM/Darwin adds ISA to the DWARF info for each function. - if (!Subtarget->isTargetDarwin()) - return 0; - return Subtarget->isThumb() ? - llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + void EmitAttribute(unsigned Attribute, unsigned Value) { + // FIXME: should be ULEB + Contents += Attribute; + Contents += Value; } - MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, - const MachineBasicBlock *MBB) const; - MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; - - /// EmitMachineConstantPoolValue - Print a machine constantpool value to - /// the .s file. - virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - EmitMachineConstantPoolValue(MCPV, OS); - OutStreamer.EmitRawText(OS.str()); + void EmitTextAttribute(unsigned Attribute, StringRef String) { + Contents += Attribute; + Contents += UppercaseString(String); + Contents += 0; } - void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV, - raw_ostream &O) { - switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) { - case 1: O << MAI->getData8bitsDirective(0); break; - case 2: O << MAI->getData16bitsDirective(0); break; - case 4: O << MAI->getData32bitsDirective(0); break; - default: assert(0 && "Unknown CPV size"); - } + void Finish() { + const size_t ContentsSize = Contents.size(); - ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); - - if (ACPV->isLSDA()) { - O << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); - } else if (ACPV->isBlockAddress()) { - O << *GetBlockAddressSymbol(ACPV->getBlockAddress()); - } else if (ACPV->isGlobalValue()) { - const GlobalValue *GV = ACPV->getGV(); - bool isIndirect = Subtarget->isTargetDarwin() && - Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); - if (!isIndirect) - O << *Mang->getSymbol(GV); - else { - // FIXME: Remove this when Darwin transition to @GOT like syntax. - MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - O << *Sym; - - MachineModuleInfoMachO &MMIMachO = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - MachineModuleInfoImpl::StubValueTy &StubSym = - GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) : - MMIMachO.getGVStubEntry(Sym); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); - } - } else { - assert(ACPV->isExtSymbol() && "unrecognized constant pool value"); - O << *GetExternalSymbolSymbol(ACPV->getSymbol()); - } + // Vendor size + Vendor name + '\0' + const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1; - if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; - if (ACPV->getPCAdjustment() != 0) { - O << "-(" << MAI->getPrivateGlobalPrefix() << "PC" - << getFunctionNumber() << "_" << ACPV->getLabelId() - << "+" << (unsigned)ACPV->getPCAdjustment(); - if (ACPV->mustAddCurrentAddress()) - O << "-."; - O << ')'; - } + // Tag + Tag Size + const size_t TagHeaderSize = 1 + 4; + + Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4); + Streamer.EmitBytes(CurrentVendor, 0); + Streamer.EmitIntValue(0, 1); // '\0' + + Streamer.EmitIntValue(ARMBuildAttrs::File, 1); + Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4); + + Streamer.EmitBytes(Contents, 0); + + Contents.clear(); } }; + } // end of anonymous namespace -#include "ARMGenAsmWriter.inc" +MachineLocation ARMAsmPrinter:: +getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; +} void ARMAsmPrinter::EmitFunctionEntryLabel() { if (AFI->isThumbFunction()) { - OutStreamer.EmitRawText(StringRef("\t.code\t16")); - if (!Subtarget->isTargetDarwin()) - OutStreamer.EmitRawText(StringRef("\t.thumb_func")); - else { - // This needs to emit to a temporary string to get properly quoted - // MCSymbols when they have spaces in them. - SmallString<128> Tmp; - raw_svector_ostream OS(Tmp); - OS << "\t.thumb_func\t" << *CurrentFnSym; - OutStreamer.EmitRawText(OS.str()); - } + OutStreamer.EmitAssemblerFlag(MCAF_Code16); + OutStreamer.EmitThumbFunc(Subtarget->isTargetDarwin()? CurrentFnSym : 0); } OutStreamer.EmitLabel(CurrentFnSym); } -/// runOnMachineFunction - This uses the printInstruction() +/// runOnMachineFunction - This uses the EmitInstruction() /// method to print assembly for each instruction. /// bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { @@ -337,32 +197,18 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); - if (Modifier && strcmp(Modifier, "dregpair") == 0) { - unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0); - unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1); - O << '{' - << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi) - << '}'; - } else if (Modifier && strcmp(Modifier, "lane") == 0) { - unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); - unsigned DReg = - TM.getRegisterInfo()->getMatchingSuperReg(Reg, - RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); - O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; - } else { - assert(!MO.getSubReg() && "Subregs should be eliminated!"); - O << getRegisterName(Reg); - } + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + O << ARMInstPrinter::getRegisterName(Reg); break; } case MachineOperand::MO_Immediate: { int64_t Imm = MO.getImm(); O << '#'; if ((Modifier && strcmp(Modifier, "lo16") == 0) || - (TF & ARMII::MO_LO16)) + (TF == ARMII::MO_LO16)) O << ":lower16:"; else if ((Modifier && strcmp(Modifier, "hi16") == 0) || - (TF & ARMII::MO_HI16)) + (TF == ARMII::MO_HI16)) O << ":upper16:"; O << Imm; break; @@ -371,9 +217,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << *MO.getMBB()->getSymbol(); return; case MachineOperand::MO_GlobalAddress: { - bool isCallOp = Modifier && !strcmp(Modifier, "call"); const GlobalValue *GV = MO.getGlobal(); - if ((Modifier && strcmp(Modifier, "lo16") == 0) || (TF & ARMII::MO_LO16)) O << ":lower16:"; @@ -383,18 +227,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << *Mang->getSymbol(GV); printOffset(MO.getOffset(), O); - - if (isCallOp && Subtarget->isTargetELF() && - TM.getRelocationModel() == Reloc::PIC_) + if (TF == ARMII::MO_PLT) O << "(PLT)"; break; } case MachineOperand::MO_ExternalSymbol: { - bool isCallOp = Modifier && !strcmp(Modifier, "call"); O << *GetExternalSymbolSymbol(MO.getSymbolName()); - - if (isCallOp && Subtarget->isTargetELF() && - TM.getRelocationModel() == Reloc::PIC_) + if (TF == ARMII::MO_PLT) O << "(PLT)"; break; } @@ -407,538 +246,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, } } -static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, - const MCAsmInfo *MAI) { - // Break it up into two parts that make up a shifter immediate. - V = ARM_AM::getSOImmVal(V); - assert(V != -1 && "Not a valid so_imm value!"); - - unsigned Imm = ARM_AM::getSOImmValImm(V); - unsigned Rot = ARM_AM::getSOImmValRot(V); - - // Print low-level immediate formation info, per - // A5.1.3: "Data-processing operands - Immediate". - if (Rot) { - O << "#" << Imm << ", " << Rot; - // Pretty printed version. - if (VerboseAsm) { - O << "\t" << MAI->getCommentString() << ' '; - O << (int)ARM_AM::rotr32(Imm, Rot); - } - } else { - O << "#" << Imm; - } -} - -/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit -/// immediate in bits 0-7. -void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isImm() && "Not a valid so_imm value!"); - printSOImm(O, MO.getImm(), isVerbose(), MAI); -} - -/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov' -/// followed by an 'orr' to materialize. -void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isImm() && "Not a valid so_imm value!"); - unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm()); - unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm()); - printSOImm(O, V1, isVerbose(), MAI); - O << "\n\torr"; - printPredicateOperand(MI, 2, O); - O << "\t"; - printOperand(MI, 0, O); - O << ", "; - printOperand(MI, 0, O); - O << ", "; - printSOImm(O, V2, isVerbose(), MAI); -} - -// so_reg is a 4-operand unit corresponding to register forms of the A5.1 -// "Addressing Mode 1 - Data-processing operands" forms. This includes: -// REG 0 0 - e.g. R5 -// REG REG 0,SH_OPC - e.g. R5, ROR R3 -// REG 0 IMM,SH_OPC - e.g. R5, LSL #3 -void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - const MachineOperand &MO3 = MI->getOperand(Op+2); - - O << getRegisterName(MO1.getReg()); - - // Print the shift opc. - ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); - O << ", " << ARM_AM::getShiftOpcStr(ShOpc); - if (MO2.getReg()) { - O << ' ' << getRegisterName(MO2.getReg()); - assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else if (ShOpc != ARM_AM::rrx) { - O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); - } -} - -void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - const MachineOperand &MO3 = MI->getOperand(Op+2); - - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); - return; - } - - O << "[" << getRegisterName(MO1.getReg()); - - if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. - O << ", #" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << ARM_AM::getAM2Offset(MO3.getImm()); - O << "]"; - return; - } - - O << ", " - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << getRegisterName(MO2.getReg()); - - if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) - << " #" << ShImm; - O << "]"; -} - -void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - - if (!MO1.getReg()) { - unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - O << "#" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << ImmOffs; - return; - } - - O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << getRegisterName(MO1.getReg()); - - if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) - << " #" << ShImm; -} - -void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - const MachineOperand &MO3 = MI->getOperand(Op+2); - - assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - O << "[" << getRegisterName(MO1.getReg()); - - if (MO2.getReg()) { - O << ", " - << (char)ARM_AM::getAM3Op(MO3.getImm()) - << getRegisterName(MO2.getReg()) - << "]"; - return; - } - - if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) - O << ", #" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << ImmOffs; - O << "]"; -} - -void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op, - raw_ostream &O){ - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - - if (MO1.getReg()) { - O << (char)ARM_AM::getAM3Op(MO2.getImm()) - << getRegisterName(MO1.getReg()); - return; - } - - unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - O << "#" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) - << ImmOffs; -} - -void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, - raw_ostream &O, - const char *Modifier) { - const MachineOperand &MO2 = MI->getOperand(Op+1); - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); - if (Modifier && strcmp(Modifier, "submode") == 0) { - O << ARM_AM::getAMSubModeStr(Mode); - } else if (Modifier && strcmp(Modifier, "wide") == 0) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); - if (Mode == ARM_AM::ia) - O << ".w"; - } else { - printOperand(MI, Op, O); - } -} - -void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, - raw_ostream &O, - const char *Modifier) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); - return; - } - - assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - - O << "[" << getRegisterName(MO1.getReg()); - - if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { - O << ", #" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) - << ImmOffs*4; - } - O << "]"; -} - -void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - - O << "[" << getRegisterName(MO1.getReg()); - if (MO2.getImm()) { - // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << (MO2.getImm() << 3); - } - O << "]"; -} - -void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op, - raw_ostream &O){ - const MachineOperand &MO = MI->getOperand(Op); - if (MO.getReg() == 0) - O << "!"; - else - O << ", " << getRegisterName(MO.getReg()); -} - -void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, - raw_ostream &O, - const char *Modifier) { - if (Modifier && strcmp(Modifier, "label") == 0) { - printPCLabel(MI, Op+1, O); - return; - } - - const MachineOperand &MO1 = MI->getOperand(Op); - assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - O << "[pc, " << getRegisterName(MO1.getReg()) << "]"; -} - -void -ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(Op); - uint32_t v = ~MO.getImm(); - int32_t lsb = CountTrailingZeros_32(v); - int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; - assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); - O << "#" << lsb << ", #" << width; -} - -void -ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - unsigned val = MI->getOperand(OpNum).getImm(); - O << ARM_MB::MemBOptToString(val); -} - -void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - unsigned ShiftOp = MI->getOperand(OpNum).getImm(); - ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); - switch (Opc) { - case ARM_AM::no_shift: - return; - case ARM_AM::lsl: - O << ", lsl #"; - break; - case ARM_AM::asr: - O << ", asr #"; - break; - default: - assert(0 && "unexpected shift opcode for shift immediate operand"); - } - O << ARM_AM::getSORegOffset(ShiftOp); -} - -//===--------------------------------------------------------------------===// - -void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op, - raw_ostream &O) { - O << "#" << MI->getOperand(Op).getImm() * 4; -} - -void -ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op, - raw_ostream &O) { - // (3 - the number of trailing zeros) is the number of then / else. - unsigned Mask = MI->getOperand(Op).getImm(); - unsigned CondBit0 = Mask >> 4 & 1; - unsigned NumTZ = CountTrailingZeros_32(Mask); - assert(NumTZ <= 3 && "Invalid IT mask!"); - for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { - bool T = ((Mask >> Pos) & 1) == CondBit0; - if (T) - O << 't'; - else - O << 'e'; - } -} - -void -ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - O << "[" << getRegisterName(MO1.getReg()); - O << ", " << getRegisterName(MO2.getReg()) << "]"; -} - -void -ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, - raw_ostream &O, - unsigned Scale) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - const MachineOperand &MO3 = MI->getOperand(Op+2); - - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); - return; - } - - O << "[" << getRegisterName(MO1.getReg()); - if (MO3.getReg()) - O << ", " << getRegisterName(MO3.getReg()); - else if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs * Scale; - O << "]"; -} - -void -ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op, - raw_ostream &O) { - printThumbAddrModeRI5Operand(MI, Op, O, 1); -} -void -ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op, - raw_ostream &O) { - printThumbAddrModeRI5Operand(MI, Op, O, 2); -} -void -ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op, - raw_ostream &O) { - printThumbAddrModeRI5Operand(MI, Op, O, 4); -} - -void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(Op); - const MachineOperand &MO2 = MI->getOperand(Op+1); - O << "[" << getRegisterName(MO1.getReg()); - if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs*4; - O << "]"; -} - -//===--------------------------------------------------------------------===// - -// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 -// register with shift forms. -// REG 0 0 - e.g. R5 -// REG IMM, SH_OPC - e.g. R5, LSL #3 -void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNum); - const MachineOperand &MO2 = MI->getOperand(OpNum+1); - - unsigned Reg = MO1.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); - O << getRegisterName(Reg); - - // Print the shift opc. - assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); - O << ", " << ARM_AM::getShiftOpcStr(ShOpc); - if (ShOpc != ARM_AM::rrx) - O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); -} - -void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, - int OpNum, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNum); - const MachineOperand &MO2 = MI->getOperand(OpNum+1); - - O << "[" << getRegisterName(MO1.getReg()); - - unsigned OffImm = MO2.getImm(); - if (OffImm) // Don't print +0. - O << ", #" << OffImm; - O << "]"; -} - -void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI, - int OpNum, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNum); - const MachineOperand &MO2 = MI->getOperand(OpNum+1); - - O << "[" << getRegisterName(MO1.getReg()); - - int32_t OffImm = (int32_t)MO2.getImm(); - // Don't print +0. - if (OffImm < 0) - O << ", #-" << -OffImm; - else if (OffImm > 0) - O << ", #" << OffImm; - O << "]"; -} - -void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI, - int OpNum, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNum); - const MachineOperand &MO2 = MI->getOperand(OpNum+1); - - O << "[" << getRegisterName(MO1.getReg()); - - int32_t OffImm = (int32_t)MO2.getImm() / 4; - // Don't print +0. - if (OffImm < 0) - O << ", #-" << -OffImm * 4; - else if (OffImm > 0) - O << ", #" << OffImm * 4; - O << "]"; -} - -void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, - int OpNum, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNum); - int32_t OffImm = (int32_t)MO1.getImm(); - // Don't print +0. - if (OffImm < 0) - O << "#-" << -OffImm; - else if (OffImm > 0) - O << "#" << OffImm; -} - -void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI, - int OpNum, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNum); - const MachineOperand &MO2 = MI->getOperand(OpNum+1); - const MachineOperand &MO3 = MI->getOperand(OpNum+2); - - O << "[" << getRegisterName(MO1.getReg()); - - assert(MO2.getReg() && "Invalid so_reg load / store address!"); - O << ", " << getRegisterName(MO2.getReg()); - - unsigned ShAmt = MO3.getImm(); - if (ShAmt) { - assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); - O << ", lsl #" << ShAmt; - } - O << "]"; -} - - //===--------------------------------------------------------------------===// -void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); - if (CC != ARMCC::AL) - O << ARMCondCodeToString(CC); -} - -void ARMAsmPrinter::printMandatoryPredicateOperand(const MachineInstr *MI, - int OpNum, - raw_ostream &O) { - ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); - O << ARMCondCodeToString(CC); -} - -void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O){ - unsigned Reg = MI->getOperand(OpNum).getReg(); - if (Reg) { - assert(Reg == ARM::CPSR && "Expect ARM CPSR register!"); - O << 's'; - } -} - -void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - int Id = (int)MI->getOperand(OpNum).getImm(); - O << MAI->getPrivateGlobalPrefix() - << "PC" << getFunctionNumber() << "_" << Id; -} - -void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "{"; - for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).isImplicit()) - continue; - if ((int)i != OpNum) O << ", "; - printOperand(MI, i, O); - } - O << "}"; -} - -void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, const char *Modifier) { - assert(Modifier && "This operand only works with a modifier!"); - // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the - // data itself. - if (!strcmp(Modifier, "label")) { - unsigned ID = MI->getOperand(OpNum).getImm(); - OutStreamer.EmitLabel(GetCPISymbol(ID)); - } else { - assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE"); - unsigned CPI = MI->getOperand(OpNum).getIndex(); - - const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; - - if (MCPE.isMachineConstantPoolEntry()) { - EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); - } else { - EmitGlobalConstant(MCPE.Val.ConstVal); - } - } -} - MCSymbol *ARMAsmPrinter:: GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, const MachineBasicBlock *MBB) const { @@ -957,126 +266,12 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { return OutContext.GetOrCreateSymbol(Name.str()); } -void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!"); - - const MachineOperand &MO1 = MI->getOperand(OpNum); - const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id - - unsigned JTI = MO1.getIndex(); - MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); - // Can't use EmitLabel until instprinter happens, label comes out in the wrong - // order. - O << "\n" << *JTISymbol << ":\n"; - - const char *JTEntryDirective = MAI->getData32bitsDirective(); - - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - bool UseSet= MAI->hasSetDirective() && TM.getRelocationModel() == Reloc::PIC_; - SmallPtrSet<MachineBasicBlock*, 8> JTSets; - for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = JTBBs[i]; - bool isNew = JTSets.insert(MBB); - - if (UseSet && isNew) { - O << "\t.set\t" - << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB) << ',' - << *MBB->getSymbol() << '-' << *JTISymbol << '\n'; - } - - O << JTEntryDirective << ' '; - if (UseSet) - O << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB); - else if (TM.getRelocationModel() == Reloc::PIC_) - O << *MBB->getSymbol() << '-' << *JTISymbol; - else - O << *MBB->getSymbol(); - - if (i != e-1) - O << '\n'; - } -} - -void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNum); - const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id - unsigned JTI = MO1.getIndex(); - - MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); - - // Can't use EmitLabel until instprinter happens, label comes out in the wrong - // order. - O << "\n" << *JTISymbol << ":\n"; - - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - bool ByteOffset = false, HalfWordOffset = false; - if (MI->getOpcode() == ARM::t2TBB) - ByteOffset = true; - else if (MI->getOpcode() == ARM::t2TBH) - HalfWordOffset = true; - - for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = JTBBs[i]; - if (ByteOffset) - O << MAI->getData8bitsDirective(); - else if (HalfWordOffset) - O << MAI->getData16bitsDirective(); - - if (ByteOffset || HalfWordOffset) - O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2"; - else - O << "\tb.w " << *MBB->getSymbol(); - - if (i != e-1) - O << '\n'; - } -} - -void ARMAsmPrinter::printTBAddrMode(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg()); - if (MI->getOpcode() == ARM::t2TBH) - O << ", lsl #1"; - O << ']'; -} - -void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << MI->getOperand(OpNum).getImm(); -} - -void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const ConstantFP *FP = MI->getOperand(OpNum).getFPImm(); - O << '#' << FP->getValueAPF().convertToFloat(); - if (isVerbose()) { - O << "\t\t" << MAI->getCommentString() << ' '; - WriteAsOperand(O, FP, /*PrintType=*/false); - } -} - -void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const ConstantFP *FP = MI->getOperand(OpNum).getFPImm(); - O << '#' << FP->getValueAPF().convertToDouble(); - if (isVerbose()) { - O << "\t\t" << MAI->getCommentString() << ' '; - WriteAsOperand(O, FP, /*PrintType=*/false); - } -} -void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - unsigned EncodedImm = MI->getOperand(OpNum).getImm(); - unsigned EltBits; - uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - O << "#0x" << utohexstr(Val); +MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const { + SmallString<60> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH" + << getFunctionNumber(); + return OutContext.GetOrCreateSymbol(Name.str()); } bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, @@ -1090,14 +285,16 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, default: return true; // Unknown modifier. case 'a': // Print as a memory address. if (MI->getOperand(OpNum).isReg()) { - O << "[" << getRegisterName(MI->getOperand(OpNum).getReg()) << "]"; + O << "[" + << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg()) + << "]"; return false; } // Fallthrough case 'c': // Don't print "#" before an immediate operand. if (!MI->getOperand(OpNum).isImm()) return true; - printNoHashImmediate(MI, OpNum, O); + O << MI->getOperand(OpNum).getImm(); return false; case 'P': // Print a VFP double precision register. case 'q': // Print a NEON quad precision register. @@ -1106,7 +303,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 'Q': case 'R': case 'H': - report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!"); + // These modifiers are not yet supported. return true; } } @@ -1124,48 +321,10 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); - O << "[" << getRegisterName(MO.getReg()) << "]"; + O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]"; return false; } -void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { - if (EnableMCInst) { - printInstructionThroughMCStreamer(MI); - return; - } - - if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY) - EmitAlignment(2); - - SmallString<128> Str; - raw_svector_ostream OS(Str); - if (MI->getOpcode() == ARM::DBG_VALUE) { - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); - OS << ']'; - OS << "+"; - printOperand(MI, NOps-2, OS); - OutStreamer.EmitRawText(OS.str()); - return; - } - - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction instead. - if (MI->getOpcode() == ARM::t2TBB) - EmitAlignment(1); -} - void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { Reloc::Model RelocM = TM.getRelocationModel(); @@ -1205,49 +364,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { } // Use unified assembler syntax. - OutStreamer.EmitRawText(StringRef("\t.syntax unified")); + OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified); // Emit ARM Build Attributes if (Subtarget->isTargetELF()) { - // CPU Type - std::string CPUString = Subtarget->getCPUString(); - if (CPUString != "generic") - OutStreamer.EmitRawText("\t.cpu " + Twine(CPUString)); - - // FIXME: Emit FPU type - if (Subtarget->hasVFP2()) - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::VFP_arch) + ", 2"); - - // Signal various FP modes. - if (!UnsafeFPMath) { - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::ABI_FP_denormal) + ", 1"); - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1"); - } - if (NoInfsFPMath && NoNaNsFPMath) - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1"); - else - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 3"); - - // 8-bytes alignment stuff. - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::ABI_align8_needed) + ", 1"); - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::ABI_align8_preserved) + ", 1"); - - // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) { - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::ABI_HardFP_use) + ", 3"); - OutStreamer.EmitRawText("\t.eabi_attribute " + - Twine(ARMBuildAttrs::ABI_VFP_args) + ", 1"); - } - // FIXME: Should we signal R9 usage? + emitAttributes(); } } @@ -1280,10 +402,10 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { else // Internal to current translation unit. // - // When we place the LSDA into the TEXT section, the type info pointers - // need to be indirect and pc-rel. We accomplish this by using NLPs. - // However, sometimes the types are local to the file. So we need to - // fill in the value for the NLP in those cases. + // When we place the LSDA into the TEXT section, the type info + // pointers need to be indirect and pc-rel. We accomplish this by + // using NLPs; however, sometimes the types are local to the file. + // We need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), 4/*size*/, 0/*addrspace*/); @@ -1321,38 +443,631 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { } //===----------------------------------------------------------------------===// +// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile() +// FIXME: +// The following seem like one-off assembler flags, but they actually need +// to appear in the .ARM.attributes section in ELF. +// Instead of subclassing the MCELFStreamer, we do the work here. + +void ARMAsmPrinter::emitAttributes() { + + emitARMAttributeSection(); + + AttributeEmitter *AttrEmitter; + if (OutStreamer.hasRawTextSupport()) + AttrEmitter = new AsmAttributeEmitter(OutStreamer); + else { + MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer); + AttrEmitter = new ObjectAttributeEmitter(O); + } + + AttrEmitter->MaybeSwitchVendor("aeabi"); + + std::string CPUString = Subtarget->getCPUString(); + + if (CPUString == "cortex-a8" || + Subtarget->isCortexA8()) { + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8"); + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); + AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, + ARMBuildAttrs::Allowed); + AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + // Fixme: figure out when this is emitted. + //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch, + // ARMBuildAttrs::AllowWMMXv1); + // + + /// ADD additional Else-cases here! + } else if (CPUString == "generic") { + // FIXME: Why these defaults? + AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, + ARMBuildAttrs::Allowed); + AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::Allowed); + } + + // FIXME: Emit FPU type + if (Subtarget->hasVFP2()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv2); + + // Signal various FP modes. + if (!UnsafeFPMath) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::Allowed); + AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions, + ARMBuildAttrs::Allowed); + } + + if (NoInfsFPMath && NoNaNsFPMath) + AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, + ARMBuildAttrs::Allowed); + else + AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, + ARMBuildAttrs::AllowIEE754); + + // FIXME: add more flags to ARMBuildAttrs.h + // 8-bytes alignment stuff. + AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1); + AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); + + // Hard float. Use both S and D registers and conform to AAPCS-VFP. + if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3); + AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1); + } + // FIXME: Should we signal R9 usage? + + if (Subtarget->hasDivide()) + AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1); + + AttrEmitter->Finish(); + delete AttrEmitter; +} + +void ARMAsmPrinter::emitARMAttributeSection() { + // <format-version> + // [ <section-length> "vendor-name" + // [ <file-tag> <size> <attribute>* + // | <section-tag> <size> <section-number>* 0 <attribute>* + // | <symbol-tag> <size> <symbol-number>* 0 <attribute>* + // ]+ + // ]* + + if (OutStreamer.hasRawTextSupport()) + return; + + const ARMElfTargetObjectFile &TLOFELF = + static_cast<const ARMElfTargetObjectFile &> + (getObjFileLowering()); + + OutStreamer.SwitchSection(TLOFELF.getAttributesSection()); + + // Format version + OutStreamer.EmitIntValue(0x41, 1); +} + +//===----------------------------------------------------------------------===// + +static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber, + unsigned LabelId, MCContext &Ctx) { + + MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix) + + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId)); + return Label; +} + +static MCSymbolRefExpr::VariantKind +getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { + switch (Modifier) { + default: llvm_unreachable("Unknown modifier!"); + case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None; + case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD; + case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF; + case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_ARM_GOTTPOFF; + case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT; + case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF; + } + return MCSymbolRefExpr::VK_None; +} + +MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { + bool isIndirect = Subtarget->isTargetDarwin() && + Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); + if (!isIndirect) + return Mang->getSymbol(GV); + + // FIXME: Remove this when Darwin transition to @GOT like syntax. + MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MachineModuleInfoMachO &MMIMachO = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + MachineModuleInfoImpl::StubValueTy &StubSym = + GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) : + MMIMachO.getGVStubEntry(MCSym); + if (StubSym.getPointer() == 0) + StubSym = MachineModuleInfoImpl:: + StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + return MCSym; +} + +void ARMAsmPrinter:: +EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType()); + + ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); + + MCSymbol *MCSym; + if (ACPV->isLSDA()) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); + MCSym = OutContext.GetOrCreateSymbol(OS.str()); + } else if (ACPV->isBlockAddress()) { + MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress()); + } else if (ACPV->isGlobalValue()) { + const GlobalValue *GV = ACPV->getGV(); + MCSym = GetARMGVSymbol(GV); + } else { + assert(ACPV->isExtSymbol() && "unrecognized constant pool value"); + MCSym = GetExternalSymbolSymbol(ACPV->getSymbol()); + } + + // Create an MCSymbol for the reference. + const MCExpr *Expr = + MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()), + OutContext); + + if (ACPV->getPCAdjustment()) { + MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), + ACPV->getLabelId(), + OutContext); + const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext); + PCRelExpr = + MCBinaryExpr::CreateAdd(PCRelExpr, + MCConstantExpr::Create(ACPV->getPCAdjustment(), + OutContext), + OutContext); + if (ACPV->mustAddCurrentAddress()) { + // We want "(<expr> - .)", but MC doesn't have a concept of the '.' + // label, so just emit a local label end reference that instead. + MCSymbol *DotSym = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(DotSym); + const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); + PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext); + } + Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext); + } + OutStreamer.EmitValue(Expr, Size); +} + +void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { + unsigned Opcode = MI->getOpcode(); + int OpNum = 1; + if (Opcode == ARM::BR_JTadd) + OpNum = 2; + else if (Opcode == ARM::BR_JTm) + OpNum = 3; + + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id + unsigned JTI = MO1.getIndex(); + + // Emit a label for the jump table. + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); + OutStreamer.EmitLabel(JTISymbol); + + // Emit each entry of the table. + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + // Construct an MCExpr for the entry. We want a value of the form: + // (BasicBlockAddr - TableBeginAddr) + // + // For example, a table with entries jumping to basic blocks BB0 and BB1 + // would look like: + // LJTI_0_0: + // .word (LBB0 - LJTI_0_0) + // .word (LBB1 - LJTI_0_0) + const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); + + if (TM.getRelocationModel() == Reloc::PIC_) + Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol, + OutContext), + OutContext); + OutStreamer.EmitValue(Expr, 4); + } +} + +void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { + unsigned Opcode = MI->getOpcode(); + int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1; + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id + unsigned JTI = MO1.getIndex(); + + // Emit a label for the jump table. + MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); + OutStreamer.EmitLabel(JTISymbol); + + // Emit each entry of the table. + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + unsigned OffsetWidth = 4; + if (MI->getOpcode() == ARM::t2TBB_JT) + OffsetWidth = 1; + else if (MI->getOpcode() == ARM::t2TBH_JT) + OffsetWidth = 2; + + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(), + OutContext); + // If this isn't a TBB or TBH, the entries are direct branch instructions. + if (OffsetWidth == 4) { + MCInst BrInst; + BrInst.setOpcode(ARM::t2B); + BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr)); + OutStreamer.EmitInstruction(BrInst); + continue; + } + // Otherwise it's an offset from the dispatch instruction. Construct an + // MCExpr for the entry. We want a value of the form: + // (BasicBlockAddr - TableBeginAddr) / 2 + // + // For example, a TBB table with entries jumping to basic blocks BB0 and BB1 + // would look like: + // LJTI_0_0: + // .byte (LBB0 - LJTI_0_0) / 2 + // .byte (LBB1 - LJTI_0_0) / 2 + const MCExpr *Expr = + MCBinaryExpr::CreateSub(MBBSymbolExpr, + MCSymbolRefExpr::Create(JTISymbol, OutContext), + OutContext); + Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext), + OutContext); + OutStreamer.EmitValue(Expr, OffsetWidth); + } +} + +void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); + OS << ']'; + OS << "+"; + printOperand(MI, NOps-2, OS); +} + +static void populateADROperands(MCInst &Inst, unsigned Dest, + const MCSymbol *Label, + unsigned pred, unsigned ccreg, + MCContext &Ctx) { + const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx); + Inst.addOperand(MCOperand::CreateReg(Dest)); + Inst.addOperand(MCOperand::CreateExpr(SymbolExpr)); + // Add predicate operands. + Inst.addOperand(MCOperand::CreateImm(pred)); + Inst.addOperand(MCOperand::CreateReg(ccreg)); +} + +void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI, + unsigned Opcode) { + MCInst TmpInst; -void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { - ARMMCInstLower MCInstLowering(OutContext, *Mang, *this); - switch (MI->getOpcode()) { - case ARM::t2MOVi32imm: - assert(0 && "Should be lowered by thumb2it pass"); + // Emit the instruction as usual, just patch the opcode. + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(Opcode); + OutStreamer.EmitInstruction(TmpInst); +} + +void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { default: break; - case ARM::PICADD: { // FIXME: Remove asm string from td file. + case ARM::t2ADDrSPi: + case ARM::t2ADDrSPi12: + case ARM::t2SUBrSPi: + case ARM::t2SUBrSPi12: + assert ((MI->getOperand(1).getReg() == ARM::SP) && + "Unexpected source register!"); + break; + + case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass"); + case ARM::DBG_VALUE: { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + SmallString<128> TmpStr; + raw_svector_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + } + case ARM::tBfar: { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tBL); + TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MI->getOperand(0).getMBB()->getSymbol(), OutContext))); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::LEApcrel: + case ARM::tLEApcrel: + case ARM::t2LEApcrel: { + // FIXME: Need to also handle globals and externals + MCInst TmpInst; + TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR + : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR + : ARM::ADR)); + populateADROperands(TmpInst, MI->getOperand(0).getReg(), + GetCPISymbol(MI->getOperand(1).getIndex()), + MI->getOperand(2).getImm(), MI->getOperand(3).getReg(), + OutContext); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::LEApcrelJT: + case ARM::tLEApcrelJT: + case ARM::t2LEApcrelJT: { + MCInst TmpInst; + TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR + : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR + : ARM::ADR)); + populateADROperands(TmpInst, MI->getOperand(0).getReg(), + GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(), + MI->getOperand(2).getImm()), + MI->getOperand(3).getImm(), MI->getOperand(4).getReg(), + OutContext); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::MOVPCRX: { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::BXr9_CALL: + case ARM::BX_CALL: { + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::BX); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } + case ARM::BMOVPCRXr9_CALL: + case ARM::BMOVPCRX_CALL: { + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } + case ARM::MOVi16_ga_pcrel: + case ARM::t2MOVi16_ga_pcrel: { + MCInst TmpInst; + TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + + unsigned TF = MI->getOperand(1).getTargetFlags(); + bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC; + const GlobalValue *GV = MI->getOperand(1).getGlobal(); + MCSymbol *GVSym = GetARMGVSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + if (isPIC) { + MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(2).getImm(), OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4; + const MCExpr *PCRelExpr = + ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr, + MCBinaryExpr::CreateAdd(LabelSymExpr, + MCConstantExpr::Create(PCAdj, OutContext), + OutContext), OutContext), OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); + } else { + const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(RefExpr)); + } + + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::MOVTi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: { + MCInst TmpInst; + TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel + ? ARM::MOVTi16 : ARM::t2MOVTi16); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + + unsigned TF = MI->getOperand(2).getTargetFlags(); + bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC; + const GlobalValue *GV = MI->getOperand(2).getGlobal(); + MCSymbol *GVSym = GetARMGVSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + if (isPIC) { + MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(3).getImm(), OutContext); + const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext); + unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4; + const MCExpr *PCRelExpr = + ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr, + MCBinaryExpr::CreateAdd(LabelSymExpr, + MCConstantExpr::Create(PCAdj, OutContext), + OutContext), OutContext), OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr)); + } else { + const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(RefExpr)); + } + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::tPICADD: { // This is a pseudo op for a label + instruction sequence, which looks like: // LPC0: - // add r0, pc, r0 + // add r0, pc // This adds the address of LPC0 to r0. // Emit the label. - // FIXME: MOVE TO SHARED PLACE. - unsigned Id = (unsigned)MI->getOperand(2).getImm(); - const char *Prefix = MAI->getPrivateGlobalPrefix(); - MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix) - + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id)); - OutStreamer.EmitLabel(Label); + OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), MI->getOperand(2).getImm(), + OutContext)); + // Form and emit the add. + MCInst AddInst; + AddInst.setOpcode(ARM::tADDhirr); + AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + AddInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(AddInst); + return; + } + case ARM::PICADD: { + // This is a pseudo op for a label + instruction sequence, which looks like: + // LPC0: + // add r0, pc, r0 + // This adds the address of LPC0 to r0. - // Form and emit tha dd. + // Emit the label. + OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), MI->getOperand(2).getImm(), + OutContext)); + + // Form and emit the add. MCInst AddInst; AddInst.setOpcode(ARM::ADDrr); AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + // Add predicate operands. + AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); + AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg())); + // Add 's' bit operand (always reg0 for this) + AddInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(AddInst); return; } - case ARM::CONSTPOOL_ENTRY: { // FIXME: Remove asm string from td file. + case ARM::PICSTR: + case ARM::PICSTRB: + case ARM::PICSTRH: + case ARM::PICLDR: + case ARM::PICLDRB: + case ARM::PICLDRH: + case ARM::PICLDRSB: + case ARM::PICLDRSH: { + // This is a pseudo op for a label + instruction sequence, which looks like: + // LPC0: + // OP r0, [pc, r0] + // The LCP0 label is referenced by a constant pool entry in order to get + // a PC-relative address at the ldr instruction. + + // Emit the label. + OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), MI->getOperand(2).getImm(), + OutContext)); + + // Form and emit the load + unsigned Opcode; + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unexpected opcode!"); + case ARM::PICSTR: Opcode = ARM::STRrs; break; + case ARM::PICSTRB: Opcode = ARM::STRBrs; break; + case ARM::PICSTRH: Opcode = ARM::STRH; break; + case ARM::PICLDR: Opcode = ARM::LDRrs; break; + case ARM::PICLDRB: Opcode = ARM::LDRBrs; break; + case ARM::PICLDRH: Opcode = ARM::LDRH; break; + case ARM::PICLDRSB: Opcode = ARM::LDRSB; break; + case ARM::PICLDRSH: Opcode = ARM::LDRSH; break; + } + MCInst LdStInst; + LdStInst.setOpcode(Opcode); + LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + LdStInst.addOperand(MCOperand::CreateReg(ARM::PC)); + LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + LdStInst.addOperand(MCOperand::CreateImm(0)); + // Add predicate operands. + LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); + LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg())); + OutStreamer.EmitInstruction(LdStInst); + + return; + } + case ARM::CONSTPOOL_ENTRY: { /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third @@ -1371,100 +1086,450 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { return; } - case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file. - // This is a hack that lowers as a two instruction sequence. - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + case ARM::t2BR_JT: { + // Lower and emit the instruction itself, then the jump table following it. + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVgpr2gpr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + // Output the data for the jump table itself + EmitJump2Table(MI); + return; + } + case ARM::t2TBB_JT: { + // Lower and emit the instruction itself, then the jump table following it. + MCInst TmpInst; + + TmpInst.setOpcode(ARM::t2TBB); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + // Output the data for the jump table itself + EmitJump2Table(MI); + // Make sure the next instruction is 2-byte aligned. + EmitAlignment(1); + return; + } + case ARM::t2TBH_JT: { + // Lower and emit the instruction itself, then the jump table following it. + MCInst TmpInst; + + TmpInst.setOpcode(ARM::t2TBH); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + // Output the data for the jump table itself + EmitJump2Table(MI); + return; + } + case ARM::tBR_JTr: + case ARM::BR_JTr: { + // Lower and emit the instruction itself, then the jump table following it. + // mov pc, target + MCInst TmpInst; + unsigned Opc = MI->getOpcode() == ARM::BR_JTr ? + ARM::MOVr : ARM::tMOVgpr2gpr; + TmpInst.setOpcode(Opc); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + if (Opc == ARM::MOVr) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + + // Make sure the Thumb jump table is 4-byte aligned. + if (Opc == ARM::tMOVgpr2gpr) + EmitAlignment(2); - unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); - unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); + // Output the data for the jump table itself + EmitJumpTable(MI); + return; + } + case ARM::BR_JTm: { + // Lower and emit the instruction itself, then the jump table following it. + // ldr pc, target + MCInst TmpInst; + if (MI->getOperand(1).getReg() == 0) { + // literal offset + TmpInst.setOpcode(ARM::LDRi12); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + } else { + TmpInst.setOpcode(ARM::LDRrs); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + TmpInst.addOperand(MCOperand::CreateImm(0)); + } + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + // Output the data for the jump table itself + EmitJumpTable(MI); + return; + } + case ARM::BR_JTadd: { + // Lower and emit the instruction itself, then the jump table following it. + // add pc, target, idx + MCInst TmpInst; + TmpInst.setOpcode(ARM::ADDrr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + + // Output the data for the jump table itself + EmitJumpTable(MI); + return; + } + case ARM::TRAP: { + // Non-Darwin binutils don't yet support the "trap" mnemonic. + // FIXME: Remove this special case when they do. + if (!Subtarget->isTargetDarwin()) { + //.long 0xe7ffdefe @ trap + uint32_t Val = 0xe7ffdefeUL; + OutStreamer.AddComment("trap"); + OutStreamer.EmitIntValue(Val, 4); + return; + } + break; + } + case ARM::tTRAP: { + // Non-Darwin binutils don't yet support the "trap" mnemonic. + // FIXME: Remove this special case when they do. + if (!Subtarget->isTargetDarwin()) { + //.short 57086 @ trap + uint16_t Val = 0xdefe; + OutStreamer.AddComment("trap"); + OutStreamer.EmitIntValue(Val, 2); + return; + } + break; + } + case ARM::t2Int_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp_nofp: + case ARM::tInt_eh_sjlj_setjmp: { + // Two incoming args: GPR:$src, GPR:$val + // mov $val, pc + // adds $val, #7 + // str $val, [$src, #4] + // movs r0, #0 + // b 1f + // movs r0, #1 + // 1: + unsigned SrcReg = MI->getOperand(0).getReg(); + unsigned ValReg = MI->getOperand(1).getReg(); + MCSymbol *Label = GetARMSJLJEHLabel(); { MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVi); - TmpInst.addOperand(MCOperand::CreateReg(DstReg)); - TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1)); - + TmpInst.setOpcode(ARM::tMOVgpr2tgpr); + TmpInst.addOperand(MCOperand::CreateReg(ValReg)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // 's' bit operand + TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + OutStreamer.AddComment("eh_setjmp begin"); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tADDi3); + TmpInst.addOperand(MCOperand::CreateReg(ValReg)); + // 's' bit operand + TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + TmpInst.addOperand(MCOperand::CreateReg(ValReg)); + TmpInst.addOperand(MCOperand::CreateImm(7)); // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - - TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(TmpInst); } - { MCInst TmpInst; - TmpInst.setOpcode(ARM::ORRri); - TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg - TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // inreg - TmpInst.addOperand(MCOperand::CreateImm(SOImmValV2)); // so_imm + TmpInst.setOpcode(ARM::tSTRi); + TmpInst.addOperand(MCOperand::CreateReg(ValReg)); + TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + // The offset immediate is #4. The operand value is scaled by 4 for the + // tSTR instruction. + TmpInst.addOperand(MCOperand::CreateImm(1)); // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVi8); + TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext); + MCInst TmpInst; + TmpInst.setOpcode(ARM::tB); + TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVi8); + TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); + TmpInst.addOperand(MCOperand::CreateImm(1)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("eh_setjmp end"); + OutStreamer.EmitInstruction(TmpInst); + } + OutStreamer.EmitLabel(Label); + return; + } + + case ARM::Int_eh_sjlj_setjmp_nofp: + case ARM::Int_eh_sjlj_setjmp: { + // Two incoming args: GPR:$src, GPR:$val + // add $val, pc, #8 + // str $val, [$src, #+4] + // mov r0, #0 + // add pc, pc, #0 + // mov r0, #1 + unsigned SrcReg = MI->getOperand(0).getReg(); + unsigned ValReg = MI->getOperand(1).getReg(); - TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::ADDri); + TmpInst.addOperand(MCOperand::CreateReg(ValReg)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateImm(8)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // 's' bit operand (always reg0 for this). + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("eh_setjmp begin"); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::STRi12); + TmpInst.addOperand(MCOperand::CreateReg(ValReg)); + TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + TmpInst.addOperand(MCOperand::CreateImm(4)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVi); + TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // 's' bit operand (always reg0 for this). + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::ADDri); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + TmpInst.addOperand(MCOperand::CreateImm(0)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // 's' bit operand (always reg0 for this). + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVi); + TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); + TmpInst.addOperand(MCOperand::CreateImm(1)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // 's' bit operand (always reg0 for this). + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("eh_setjmp end"); OutStreamer.EmitInstruction(TmpInst); } return; } - case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. - // This is a hack that lowers as a two instruction sequence. - unsigned DstReg = MI->getOperand(0).getReg(); - const MachineOperand &MO = MI->getOperand(1); - MCOperand V1, V2; - if (MO.isImm()) { - unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); - V1 = MCOperand::CreateImm(ImmVal & 65535); - V2 = MCOperand::CreateImm(ImmVal >> 16); - } else if (MO.isGlobal()) { - MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO); - const MCSymbolRefExpr *SymRef1 = - MCSymbolRefExpr::Create(Symbol, - MCSymbolRefExpr::VK_ARM_LO16, OutContext); - const MCSymbolRefExpr *SymRef2 = - MCSymbolRefExpr::Create(Symbol, - MCSymbolRefExpr::VK_ARM_HI16, OutContext); - V1 = MCOperand::CreateExpr(SymRef1); - V2 = MCOperand::CreateExpr(SymRef2); - } else { - MI->dump(); - llvm_unreachable("cannot handle this operand"); + case ARM::Int_eh_sjlj_longjmp: { + // ldr sp, [$src, #8] + // ldr $scratch, [$src, #4] + // ldr r7, [$src] + // bx $scratch + unsigned SrcReg = MI->getOperand(0).getReg(); + unsigned ScratchReg = MI->getOperand(1).getReg(); + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::LDRi12); + TmpInst.addOperand(MCOperand::CreateReg(ARM::SP)); + TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + TmpInst.addOperand(MCOperand::CreateImm(8)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); } - { MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVi16); - TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg - TmpInst.addOperand(V1); // lower16(imm) - + TmpInst.setOpcode(ARM::LDRi12); + TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + TmpInst.addOperand(MCOperand::CreateImm(4)); // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(TmpInst); } - { MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVTi16); - TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg - TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg - TmpInst.addOperand(V2); // upper16(imm) - + TmpInst.setOpcode(ARM::LDRi12); + TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); + TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + TmpInst.addOperand(MCOperand::CreateImm(0)); // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); - + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::BX); + TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); OutStreamer.EmitInstruction(TmpInst); } - return; } + case ARM::tInt_eh_sjlj_longjmp: { + // ldr $scratch, [$src, #8] + // mov sp, $scratch + // ldr $scratch, [$src, #4] + // ldr r7, [$src] + // bx $scratch + unsigned SrcReg = MI->getOperand(0).getReg(); + unsigned ScratchReg = MI->getOperand(1).getReg(); + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tLDRi); + TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + // The offset immediate is #8. The operand value is scaled by 4 for the + // tLDR instruction. + TmpInst.addOperand(MCOperand::CreateImm(2)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVtgpr2gpr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::SP)); + TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tLDRi); + TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + TmpInst.addOperand(MCOperand::CreateImm(1)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tLDRr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); + TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tBX_RET_vararg); + TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } + // These are the pseudos created to comply with stricter operand restrictions + // on ARMv5. Lower them now to "normal" instructions, since all the + // restrictions are already satisfied. + case ARM::MULv5: + EmitPatchedInstruction(MI, ARM::MUL); + return; + case ARM::MLAv5: + EmitPatchedInstruction(MI, ARM::MLA); + return; + case ARM::SMULLv5: + EmitPatchedInstruction(MI, ARM::SMULL); + return; + case ARM::UMULLv5: + EmitPatchedInstruction(MI, ARM::UMULL); + return; + case ARM::SMLALv5: + EmitPatchedInstruction(MI, ARM::SMLAL); + return; + case ARM::UMLALv5: + EmitPatchedInstruction(MI, ARM::UMLAL); + return; + case ARM::UMAALv5: + EmitPatchedInstruction(MI, ARM::UMAAL); + return; } MCInst TmpInst; - MCInstLowering.Lower(MI, TmpInst); + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); OutStreamer.EmitInstruction(TmpInst); } @@ -1476,7 +1541,7 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, false); + return new ARMInstPrinter(MAI); return 0; } diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h new file mode 100644 index 0000000..5852684 --- /dev/null +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -0,0 +1,112 @@ +//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// ARM Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMASMPRINTER_H +#define ARMASMPRINTER_H + +#include "ARM.h" +#include "ARMTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +namespace ARM { + enum DW_ISA { + DW_ISA_ARM_thumb = 1, + DW_ISA_ARM_arm = 2 + }; +} + +class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { + + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when printing asm code for different targets. + const ARMSubtarget *Subtarget; + + /// AFI - Keep a pointer to ARMFunctionInfo for the current + /// MachineFunction. + ARMFunctionInfo *AFI; + + /// MCP - Keep a pointer to constantpool entries of the current + /// MachineFunction. + const MachineConstantPool *MCP; + +public: + explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) { + Subtarget = &TM.getSubtarget<ARMSubtarget>(); + } + + virtual const char *getPassName() const { + return "ARM Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char *Modifier = 0); + + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O); + + void EmitJumpTable(const MachineInstr *MI); + void EmitJump2Table(const MachineInstr *MI); + virtual void EmitInstruction(const MachineInstr *MI); + bool runOnMachineFunction(MachineFunction &F); + + virtual void EmitConstantPool() {} // we emit constant pools customly! + virtual void EmitFunctionEntryLabel(); + void EmitStartOfAsmFile(Module &M); + void EmitEndOfAsmFile(Module &M); + +private: + // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile() + void emitAttributes(); + + // Helper for ELF .o only + void emitARMAttributeSection(); + + // Generic helper used to emit e.g. ARMv5 mul pseudos + void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc); + +public: + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + + virtual unsigned getISAEncoding() { + // ARM/Darwin adds ISA to the DWARF info for each function. + if (!Subtarget->isTargetDarwin()) + return 0; + return Subtarget->isThumb() ? + llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + } + + MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const; + MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; + + MCSymbol *GetARMSJLJEHLabel(void) const; + + MCSymbol *GetARMGVSymbol(const GlobalValue *GV); + + /// EmitMachineConstantPoolValue - Print a machine constantpool value to + /// the .s file. + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h new file mode 100644 index 0000000..a56cc1a --- /dev/null +++ b/lib/Target/ARM/ARMBaseInfo.h @@ -0,0 +1,249 @@ +//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the ARM target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMBASEINFO_H +#define ARMBASEINFO_H + +#include "llvm/Support/ErrorHandling.h" + +// Note that the following auto-generated files only defined enum types, and +// so are safe to include here. + +// Defines symbolic names for ARM registers. This defines a mapping from +// register name to register number. +// +#include "ARMGenRegisterNames.inc" + +// Defines symbolic names for the ARM instructions. +// +#include "ARMGenInstrNames.inc" + +namespace llvm { + +// Enums corresponding to ARM condition codes +namespace ARMCC { + // The CondCodes constants map directly to the 4-bit encoding of the + // condition field for predicated instructions. + enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Carry set >, ==, or unordered + LO, // Carry clear Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Not unordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Greater than Greater than + LE, // Less than or equal <, ==, or unordered + AL // Always (unconditional) Always (unconditional) + }; + + inline static CondCodes getOppositeCondition(CondCodes CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case EQ: return NE; + case NE: return EQ; + case HS: return LO; + case LO: return HS; + case MI: return PL; + case PL: return MI; + case VS: return VC; + case VC: return VS; + case HI: return LS; + case LS: return HI; + case GE: return LT; + case LT: return GE; + case GT: return LE; + case LE: return GT; + } + } +} // namespace ARMCC + +inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case ARMCC::EQ: return "eq"; + case ARMCC::NE: return "ne"; + case ARMCC::HS: return "hs"; + case ARMCC::LO: return "lo"; + case ARMCC::MI: return "mi"; + case ARMCC::PL: return "pl"; + case ARMCC::VS: return "vs"; + case ARMCC::VC: return "vc"; + case ARMCC::HI: return "hi"; + case ARMCC::LS: return "ls"; + case ARMCC::GE: return "ge"; + case ARMCC::LT: return "lt"; + case ARMCC::GT: return "gt"; + case ARMCC::LE: return "le"; + case ARMCC::AL: return "al"; + } +} + +namespace ARM_PROC { + enum IMod { + IE = 2, + ID = 3 + }; + + enum IFlags { + F = 1, + I = 2, + A = 4 + }; + + inline static const char *IFlagsToString(unsigned val) { + switch (val) { + default: llvm_unreachable("Unknown iflags operand"); + case F: return "f"; + case I: return "i"; + case A: return "a"; + } + } + + inline static const char *IModToString(unsigned val) { + switch (val) { + default: llvm_unreachable("Unknown imod operand"); + case IE: return "ie"; + case ID: return "id"; + } + } +} + +namespace ARM_MB { + // The Memory Barrier Option constants map directly to the 4-bit encoding of + // the option field for memory barrier operations. + enum MemBOpt { + SY = 15, + ST = 14, + ISH = 11, + ISHST = 10, + NSH = 7, + NSHST = 6, + OSH = 3, + OSHST = 2 + }; + + inline static const char *MemBOptToString(unsigned val) { + switch (val) { + default: llvm_unreachable("Unknown memory operation"); + case SY: return "sy"; + case ST: return "st"; + case ISH: return "ish"; + case ISHST: return "ishst"; + case NSH: return "nsh"; + case NSHST: return "nshst"; + case OSH: return "osh"; + case OSHST: return "oshst"; + } + } +} // namespace ARM_MB + +/// getARMRegisterNumbering - Given the enum value for some register, e.g. +/// ARM::LR, return the number that it corresponds to (e.g. 14). +inline static unsigned getARMRegisterNumbering(unsigned Reg) { + using namespace ARM; + switch (Reg) { + default: + llvm_unreachable("Unknown ARM register!"); + case R0: case S0: case D0: case Q0: return 0; + case R1: case S1: case D1: case Q1: return 1; + case R2: case S2: case D2: case Q2: return 2; + case R3: case S3: case D3: case Q3: return 3; + case R4: case S4: case D4: case Q4: return 4; + case R5: case S5: case D5: case Q5: return 5; + case R6: case S6: case D6: case Q6: return 6; + case R7: case S7: case D7: case Q7: return 7; + case R8: case S8: case D8: case Q8: return 8; + case R9: case S9: case D9: case Q9: return 9; + case R10: case S10: case D10: case Q10: return 10; + case R11: case S11: case D11: case Q11: return 11; + case R12: case S12: case D12: case Q12: return 12; + case SP: case S13: case D13: case Q13: return 13; + case LR: case S14: case D14: case Q14: return 14; + case PC: case S15: case D15: case Q15: return 15; + + case S16: case D16: return 16; + case S17: case D17: return 17; + case S18: case D18: return 18; + case S19: case D19: return 19; + case S20: case D20: return 20; + case S21: case D21: return 21; + case S22: case D22: return 22; + case S23: case D23: return 23; + case S24: case D24: return 24; + case S25: case D25: return 25; + case S26: case D26: return 26; + case S27: case D27: return 27; + case S28: case D28: return 28; + case S29: case D29: return 29; + case S30: case D30: return 30; + case S31: case D31: return 31; + } +} + +namespace ARMII { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // ARM Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_LO16 - On a symbol operand, this represents a relocation containing + /// lower 16 bit of the address. Used only via movw instruction. + MO_LO16, + + /// MO_HI16 - On a symbol operand, this represents a relocation containing + /// higher 16 bit of the address. Used only via movt instruction. + MO_HI16, + + /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a + /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol, + /// i.e. "FOO$non_lazy_ptr". + /// Used only via movw instruction. + MO_LO16_NONLAZY, + + /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a + /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol, + /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction. + MO_HI16_NONLAZY, + + /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a + /// relocation containing lower 16 bit of the PC relative address of the + /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL". + /// Used only via movw instruction. + MO_LO16_NONLAZY_PIC, + + /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a + /// relocation containing lower 16 bit of the PC relative address of the + /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL". + /// Used only via movt instruction. + MO_HI16_NONLAZY_PIC, + + /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a + /// call operand. + MO_PLT + }; +} // end namespace ARMII + +} // end namespace llvm; + +#endif diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index e4f10f9..2268e59 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -15,13 +15,13 @@ #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" +#include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" #include "ARMGenInstrInfo.inc" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/GlobalValue.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -34,15 +34,75 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); +/// ARM_MLxEntry - Record information about MLA / MLS instructions. +struct ARM_MLxEntry { + unsigned MLxOpc; // MLA / MLS opcode + unsigned MulOpc; // Expanded multiplication opcode + unsigned AddSubOpc; // Expanded add / sub opcode + bool NegAcc; // True if the acc is negated before the add / sub. + bool HasLane; // True if instruction has an extra "lane" operand. +}; + +static const ARM_MLxEntry ARM_MLxTable[] = { + // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane + // fp scalar ops + { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, + { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, + { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, + { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, + { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, + { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, + { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, + { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, + + // fp SIMD ops + { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, + { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, + { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, + { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, + { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, + { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, + { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, + { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, +}; + ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), Subtarget(STI) { + for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { + if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) + assert(false && "Duplicated entries?"); + MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); + MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); + } +} + +// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl +// currently defaults to no prepass hazard recognizer. +ScheduleHazardRecognizer *ARMBaseInstrInfo:: +CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + if (usePreRAHazardRecognizer()) { + const InstrItineraryData *II = TM->getInstrItineraryData(); + return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); + } + return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); +} + +ScheduleHazardRecognizer *ARMBaseInstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + if (Subtarget.isThumb2() || Subtarget.hasVFP2()) + return (ScheduleHazardRecognizer *) + new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); + return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); } MachineInstr * @@ -140,7 +200,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (isLoad) MemMI = BuildMI(MF, MI->getDebugLoc(), get(MemOpc), MI->getOperand(0).getReg()) - .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); + .addReg(WBReg).addImm(0).addImm(Pred); else MemMI = BuildMI(MF, MI->getDebugLoc(), get(MemOpc)).addReg(MI->getOperand(1).getReg()) @@ -151,7 +211,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (isLoad) MemMI = BuildMI(MF, MI->getDebugLoc(), get(MemOpc), MI->getOperand(0).getReg()) - .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); + .addReg(BaseReg).addImm(0).addImm(Pred); else MemMI = BuildMI(MF, MI->getDebugLoc(), get(MemOpc)).addReg(MI->getOperand(1).getReg()) @@ -166,8 +226,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (LV) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { unsigned Reg = MO.getReg(); LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); @@ -197,43 +256,6 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return NewMIs[0]; } -bool -ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - bool isKill = true; - - // Add the callee-saved register as live-in unless it's LR and - // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress - // then it's already added to the function and entry block live-in sets. - if (Reg == ARM::LR) { - MachineFunction &MF = *MBB.getParent(); - if (MF.getFrameInfo()->isReturnAddressTaken() && - MF.getRegInfo().isLiveIn(Reg)) - isKill = false; - } - - if (isKill) - MBB.addLiveIn(Reg); - - // Insert the spill to the stack frame. The register is killed at the spill - // - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - storeRegToStackSlot(MBB, MI, Reg, isKill, - CSI[i].getFrameIdx(), RC, TRI); - } - return true; -} - // Branch analysis. bool ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, @@ -275,13 +297,31 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // Get the instruction before it if it is a terminator. MachineInstr *SecondLastInst = I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && isUncondBranchOpcode(LastOpc)) { + while (isUncondBranchOpcode(SecondLastOpc)) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + // Return now the only terminator is an unconditional branch. + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + } // If there are three terminators, we don't know what sort of block this is. if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with a B and a Bcc, handle it. - unsigned SecondLastOpc = SecondLastInst->getOpcode(); if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { TBB = SecondLastInst->getOperand(0).getMBB(); Cond.push_back(SecondLastInst->getOperand(1)); @@ -468,7 +508,7 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { } /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. -DISABLE_INLINE +LLVM_ATTRIBUTE_NOINLINE static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, unsigned JTI); static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, @@ -513,6 +553,14 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARMII::Size2Bytes: return 2; // Thumb1 instruction. case ARMII::SizeSpecial: { switch (Opc) { + case ARM::MOVi16_ga_pcrel: + case ARM::MOVTi16_ga_pcrel: + case ARM::t2MOVi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: + return 4; + case ARM::MOVi32imm: + case ARM::t2MOVi32imm: + return 8; case ARM::CONSTPOOL_ENTRY: // If this machine instr is a constant pool entry, its size is recorded as // operand #2. @@ -533,13 +581,13 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::BR_JTadd: case ARM::tBR_JTr: case ARM::t2BR_JT: - case ARM::t2TBB: - case ARM::t2TBH: { + case ARM::t2TBB_JT: + case ARM::t2TBH_JT: { // These are jumptable branches, i.e. a branch followed by an inlined // jumptable. The size is 4 + 4 * number of entries. For TBB, each // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB) - ? 1 : ((Opc == ARM::t2TBH) ? 2 : 4); + unsigned EntrySize = (Opc == ARM::t2TBB_JT) + ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); unsigned NumOps = TID.getNumOperands(); MachineOperand JTOP = MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); @@ -557,7 +605,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // alignment issue. unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; unsigned NumEntries = getNumJTEntries(JT, JTI); - if (Opc == ARM::t2TBB && (NumEntries & 1)) + if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) // Make sure the instruction that follows TBB is 2-byte aligned. // FIXME: Constant island pass should insert an "ALIGN" instruction // instead. @@ -573,84 +621,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 0; // Not reached } -unsigned -ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case ARM::LDR: - case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isReg() && - MI->getOperand(3).isImm() && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - case ARM::t2LDRi12: - case ARM::tRestore: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - case ARM::VLDRD: - case ARM::VLDRS: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - - return 0; -} - -unsigned -ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case ARM::STR: - case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isReg() && - MI->getOperand(3).isImm() && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - case ARM::t2STRi12: - case ARM::tSpill: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - case ARM::VSTRD: - case ARM::VSTRS: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - - return 0; -} - void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -715,8 +685,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - MachineMemOperand::MOStore, 0, + MF.getMachineMemOperand(MachinePointerInfo( + PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); @@ -728,9 +699,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, switch (RC->getID()) { case ARM::GPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); break; case ARM::SPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) @@ -747,17 +718,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: case ARM::QPR_8RegClassID: - // FIXME: Neon instructions should support predicates - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) + if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addMemOperand(MMO)); } break; @@ -766,18 +735,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q)) - .addFrameIndex(FI).addImm(16); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); - MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); - AddDefaultPred(MIB.addMemOperand(MMO)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) + .addFrameIndex(FI).addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); } else { MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI)) .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); @@ -787,9 +752,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case ARM::QQQQPRRegClassID: { MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI)) .addMemOperand(MMO); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); @@ -806,6 +770,53 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } +unsigned +ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::STRrs: + case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::STRi12: + case ARM::t2STRi12: + case ARM::tSpill: + case ARM::VSTRD: + case ARM::VSTRS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::VST1q64Pseudo: + if (MI->getOperand(0).isFI() && + MI->getOperand(2).getSubReg() == 0) { + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(2).getReg(); + } + break; + case ARM::VSTMQIA: + if (MI->getOperand(1).isFI() && + MI->getOperand(0).getSubReg() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + + return 0; +} + void ARMBaseInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, @@ -817,8 +828,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - MachineMemOperand::MOLoad, 0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); @@ -830,8 +842,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, switch (RC->getID()) { case ARM::GPRRegClassID: - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) - .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); break; case ARM::SPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) @@ -846,31 +858,26 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: case ARM::QPR_8RegClassID: - if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) + if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) .addFrameIndex(FI) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addMemOperand(MMO)); } break; case ARM::QQPRRegClassID: case ARM::QQPR_VFP2RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q)); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) + .addFrameIndex(FI).addImm(16) + .addMemOperand(MMO)); } else { MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI)) .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); @@ -880,9 +887,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, break; case ARM::QQQQPRRegClassID: { MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI)) .addMemOperand(MMO); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); @@ -899,6 +905,53 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } +unsigned +ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::LDRi12: + case ARM::t2LDRi12: + case ARM::tRestore: + case ARM::VLDRD: + case ARM::VLDRS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::VLD1q64Pseudo: + if (MI->getOperand(1).isFI() && + MI->getOperand(0).getSubReg() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::VLDMQIA: + if (MI->getOperand(1).isFI() && + MI->getOperand(0).getSubReg() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + + return 0; +} + MachineInstr* ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -921,7 +974,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); - unsigned PCLabelId = AFI->createConstPoolEntryUId(); + unsigned PCLabelId = AFI->createPICLabelUId(); ARMConstantPoolValue *NewCPV = 0; // FIXME: The below assumes PIC relocation model and that the function // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and @@ -991,12 +1044,18 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { } bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const { + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { int Opcode = MI0->getOpcode(); if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || - Opcode == ARM::tLDRpci_pic) { + Opcode == ARM::tLDRpci_pic || + Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::MOV_ga_pcrel || + Opcode == ARM::MOV_ga_pcrel_ldr || + Opcode == ARM::t2MOV_ga_dyn || + Opcode == ARM::t2MOV_ga_pcrel) { if (MI1->getOpcode() != Opcode) return false; if (MI0->getNumOperands() != MI1->getNumOperands()) @@ -1007,6 +1066,14 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, if (MO0.getOffset() != MO1.getOffset()) return false; + if (Opcode == ARM::MOV_ga_dyn || + Opcode == ARM::MOV_ga_pcrel || + Opcode == ARM::MOV_ga_pcrel_ldr || + Opcode == ARM::t2MOV_ga_dyn || + Opcode == ARM::t2MOV_ga_pcrel) + // Ignore the PC labels. + return MO0.getGlobal() == MO1.getGlobal(); + const MachineFunction *MF = MI0->getParent()->getParent(); const MachineConstantPool *MCP = MF->getConstantPool(); int CPI0 = MO0.getIndex(); @@ -1018,6 +1085,37 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, ARMConstantPoolValue *ACPV1 = static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); return ACPV0->hasSameValue(ACPV1); + } else if (Opcode == ARM::PICLDR) { + if (MI1->getOpcode() != Opcode) + return false; + if (MI0->getNumOperands() != MI1->getNumOperands()) + return false; + + unsigned Addr0 = MI0->getOperand(1).getReg(); + unsigned Addr1 = MI1->getOperand(1).getReg(); + if (Addr0 != Addr1) { + if (!MRI || + !TargetRegisterInfo::isVirtualRegister(Addr0) || + !TargetRegisterInfo::isVirtualRegister(Addr1)) + return false; + + // This assumes SSA form. + MachineInstr *Def0 = MRI->getVRegDef(Addr0); + MachineInstr *Def1 = MRI->getVRegDef(Addr1); + // Check if the loaded value, e.g. a constantpool of a global address, are + // the same. + if (!produceSameValue(Def0, Def1, MRI)) + return false; + } + + for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { + // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg + const MachineOperand &MO0 = MI0->getOperand(i); + const MachineOperand &MO1 = MI1->getOperand(i); + if (!MO0.isIdenticalTo(MO1)) + return false; + } + return true; } return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); @@ -1040,8 +1138,8 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, switch (Load1->getMachineOpcode()) { default: return false; - case ARM::LDR: - case ARM::LDRB: + case ARM::LDRi12: + case ARM::LDRBi12: case ARM::LDRD: case ARM::LDRH: case ARM::LDRSB: @@ -1059,8 +1157,8 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, switch (Load2->getMachineOpcode()) { default: return false; - case ARM::LDR: - case ARM::LDRB: + case ARM::LDRi12: + case ARM::LDRBi12: case ARM::LDRD: case ARM::LDRH: case ARM::LDRSB: @@ -1164,22 +1262,37 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; } -bool ARMBaseInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const { - if (!NumInstrs) +bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCyles, + unsigned ExtraPredCycles, + float Probability, + float Confidence) const { + if (!NumCyles) return false; - if (Subtarget.getCPUString() == "generic") - // Generic (and overly aggressive) if-conversion limits for testing. - return NumInstrs <= 10; - else if (Subtarget.hasV7Ops()) - return NumInstrs <= 3; - return NumInstrs <= 2; + + // Attempt to estimate the relative costs of predication versus branching. + float UnpredCost = Probability * NumCyles; + UnpredCost += 1.0; // The branch itself + UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); + + return (float)(NumCyles + ExtraPredCycles) < UnpredCost; } - + bool ARMBaseInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, - MachineBasicBlock &FMBB, unsigned NumF) const { - return NumT && NumF && NumT <= 2 && NumF <= 2; +isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned TCycles, unsigned TExtra, + MachineBasicBlock &FMBB, + unsigned FCycles, unsigned FExtra, + float Probability, float Confidence) const { + if (!TCycles || !FCycles) + return false; + + // Attempt to estimate the relative costs of predication versus branching. + float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles; + UnpredCost += 1.0; // The branch itself + UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); + + return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost; } /// getInstrPredicate - If instruction is predicated, returns its predicate @@ -1292,6 +1405,12 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned NumBits = 0; unsigned Scale = 1; switch (AddrMode) { + case ARMII::AddrMode_i12: { + ImmIdx = FrameRegIdx + 1; + InstrOffs = MI.getOperand(ImmIdx).getImm(); + NumBits = 12; + break; + } case ARMII::AddrMode2: { ImmIdx = FrameRegIdx+2; InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); @@ -1342,8 +1461,15 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with sp MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - if (isSub) - ImmedOffset |= 1 << NumBits; + // FIXME: When addrmode2 goes away, this will simplify (like the + // T2 version), as the LDR.i12 versions don't need the encoding + // tricks for the offset value. + if (isSub) { + if (AddrMode == ARMII::AddrMode_i12) + ImmedOffset = -ImmedOffset; + else + ImmedOffset |= 1 << NumBits; + } ImmOp.ChangeToImmediate(ImmedOffset); Offset = 0; return true; @@ -1351,8 +1477,12 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Otherwise, it didn't fit. Pull in what we can to simplify the immed. ImmedOffset = ImmedOffset & Mask; - if (isSub) - ImmedOffset |= 1 << NumBits; + if (isSub) { + if (AddrMode == ARMII::AddrMode_i12) + ImmedOffset = -ImmedOffset; + else + ImmedOffset |= 1 << NumBits; + } ImmOp.ChangeToImmediate(ImmedOffset); Offset &= ~(Mask*Scale); } @@ -1363,25 +1493,88 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } bool ARMBaseInstrInfo:: -AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const { +AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, + int &CmpValue) const { switch (MI->getOpcode()) { default: break; case ARM::CMPri: - case ARM::CMPzri: case ARM::t2CMPri: - case ARM::t2CMPzri: SrcReg = MI->getOperand(0).getReg(); + CmpMask = ~0; CmpValue = MI->getOperand(1).getImm(); return true; + case ARM::TSTri: + case ARM::t2TSTri: + SrcReg = MI->getOperand(0).getReg(); + CmpMask = MI->getOperand(1).getImm(); + CmpValue = 0; + return true; } return false; } -/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so -/// that we can remove a "comparison with zero". +/// isSuitableForMask - Identify a suitable 'and' instruction that +/// operates on the given source register and applies the same mask +/// as a 'tst' instruction. Provide a limited look-through for copies. +/// When successful, MI will hold the found instruction. +static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, + int CmpMask, bool CommonUse) { + switch (MI->getOpcode()) { + case ARM::ANDri: + case ARM::t2ANDri: + if (CmpMask != MI->getOperand(2).getImm()) + return false; + if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) + return true; + break; + case ARM::COPY: { + // Walk down one instruction which is potentially an 'and'. + const MachineInstr &Copy = *MI; + MachineBasicBlock::iterator AND( + llvm::next(MachineBasicBlock::iterator(MI))); + if (AND == MI->getParent()->end()) return false; + MI = AND; + return isSuitableForMask(MI, Copy.getOperand(0).getReg(), + CmpMask, true); + } + } + + return false; +} + +/// OptimizeCompareInstr - Convert the instruction supplying the argument to the +/// comparison into one that sets the zero bit in the flags register. bool ARMBaseInstrInfo:: -ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { +OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, + int CmpValue, const MachineRegisterInfo *MRI) const { + if (CmpValue != 0) + return false; + + MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); + if (llvm::next(DI) != MRI->def_end()) + // Only support one definition. + return false; + + MachineInstr *MI = &*DI; + + // Masked compares sometimes use the same register as the corresponding 'and'. + if (CmpMask != ~0) { + if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { + MI = 0; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), + UE = MRI->use_end(); UI != UE; ++UI) { + if (UI->getParent() != CmpInstr->getParent()) continue; + MachineInstr *PotentialAND = &*UI; + if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) + continue; + MI = PotentialAND; + break; + } + if (!MI) return false; + } + } + // Conservatively refuse to convert an instruction which isn't in the same BB // as the comparison. if (MI->getParent() != CmpInstr->getParent()) @@ -1391,16 +1584,20 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { // want to change. MachineBasicBlock::const_iterator I = CmpInstr, E = MI, B = MI->getParent()->begin(); + + // Early exit if CmpInstr is at the beginning of the BB. + if (I == B) return false; + --I; for (; I != E; --I) { const MachineInstr &Instr = *I; for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); - if (!MO.isReg() || !MO.isDef()) continue; + if (!MO.isReg()) continue; - // This instruction modifies CPSR before the one we want to change. We - // can't do this transformation. + // This instruction modifies or uses CPSR after the one we want to + // change. We can't do this transformation. if (MO.getReg() == ARM::CPSR) return false; } @@ -1414,15 +1611,713 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { switch (MI->getOpcode()) { default: break; case ARM::ADDri: + case ARM::ANDri: + case ARM::t2ANDri: case ARM::SUBri: case ARM::t2ADDri: case ARM::t2SUBri: - MI->RemoveOperand(5); - MachineInstrBuilder(MI) - .addReg(ARM::CPSR, RegState::Define | RegState::Implicit); + // Toggle the optional operand to CPSR. + MI->getOperand(5).setReg(ARM::CPSR); + MI->getOperand(5).setIsDef(true); CmpInstr->eraseFromParent(); return true; } return false; } + +bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, + MachineInstr *DefMI, unsigned Reg, + MachineRegisterInfo *MRI) const { + // Fold large immediates into add, sub, or, xor. + unsigned DefOpc = DefMI->getOpcode(); + if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) + return false; + if (!DefMI->getOperand(1).isImm()) + // Could be t2MOVi32imm <ga:xx> + return false; + + if (!MRI->hasOneNonDBGUse(Reg)) + return false; + + unsigned UseOpc = UseMI->getOpcode(); + unsigned NewUseOpc = 0; + uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); + uint32_t SOImmValV1 = 0, SOImmValV2 = 0; + bool Commute = false; + switch (UseOpc) { + default: return false; + case ARM::SUBrr: + case ARM::ADDrr: + case ARM::ORRrr: + case ARM::EORrr: + case ARM::t2SUBrr: + case ARM::t2ADDrr: + case ARM::t2ORRrr: + case ARM::t2EORrr: { + Commute = UseMI->getOperand(2).getReg() != Reg; + switch (UseOpc) { + default: break; + case ARM::SUBrr: { + if (Commute) + return false; + ImmVal = -ImmVal; + NewUseOpc = ARM::SUBri; + // Fallthrough + } + case ARM::ADDrr: + case ARM::ORRrr: + case ARM::EORrr: { + if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) + return false; + SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); + SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); + switch (UseOpc) { + default: break; + case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; + case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; + case ARM::EORrr: NewUseOpc = ARM::EORri; break; + } + break; + } + case ARM::t2SUBrr: { + if (Commute) + return false; + ImmVal = -ImmVal; + NewUseOpc = ARM::t2SUBri; + // Fallthrough + } + case ARM::t2ADDrr: + case ARM::t2ORRrr: + case ARM::t2EORrr: { + if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) + return false; + SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); + SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); + switch (UseOpc) { + default: break; + case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; + case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; + case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; + } + break; + } + } + } + } + + unsigned OpIdx = Commute ? 2 : 1; + unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); + bool isKill = UseMI->getOperand(OpIdx).isKill(); + unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); + AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), + *UseMI, UseMI->getDebugLoc(), + get(NewUseOpc), NewReg) + .addReg(Reg1, getKillRegState(isKill)) + .addImm(SOImmValV1))); + UseMI->setDesc(get(NewUseOpc)); + UseMI->getOperand(1).setReg(NewReg); + UseMI->getOperand(1).setIsKill(); + UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); + DefMI->eraseFromParent(); + return true; +} + +unsigned +ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + const TargetInstrDesc &Desc = MI->getDesc(); + unsigned Class = Desc.getSchedClass(); + unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps) + return UOps; + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: + llvm_unreachable("Unexpected multi-uops instruction!"); + break; + case ARM::VLDMQIA: + case ARM::VLDMQDB: + case ARM::VSTMQIA: + case ARM::VSTMQDB: + return 2; + + // The number of uOps for load / store multiple are determined by the number + // registers. + // + // On Cortex-A8, each pair of register loads / stores can be scheduled on the + // same cycle. The scheduling for the first load / store must be done + // separately by assuming the the address is not 64-bit aligned. + // + // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address + // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON + // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. + case ARM::VLDMDIA: + case ARM::VLDMDDB: + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA: + case ARM::VLDMSDB: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + case ARM::VSTMDIA: + case ARM::VSTMDDB: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA: + case ARM::VSTMSDB: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: { + unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); + return (NumRegs / 2) + (NumRegs % 2) + 1; + } + + case ARM::LDMIA_RET: + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA: + case ARM::tSTMIA_UPD: + case ARM::tPOP_RET: + case ARM::tPOP: + case ARM::tPUSH: + case ARM::t2LDMIA_RET: + case ARM::t2LDMIA: + case ARM::t2LDMDB: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA: + case ARM::t2STMDB: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: { + unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; + if (Subtarget.isCortexA8()) { + if (NumRegs < 4) + return 2; + // 4 registers would be issued: 2, 2. + // 5 registers would be issued: 2, 2, 1. + UOps = (NumRegs / 2); + if (NumRegs % 2) + ++UOps; + return UOps; + } else if (Subtarget.isCortexA9()) { + UOps = (NumRegs / 2); + // If there are odd number of registers or if it's not 64-bit aligned, + // then it takes an extra AGU (Address Generation Unit) cycle. + if ((NumRegs % 2) || + !MI->hasOneMemOperand() || + (*MI->memoperands_begin())->getAlignment() < 8) + ++UOps; + return UOps; + } else { + // Assume the worst. + return NumRegs; + } + } + } +} + +int +ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, + const TargetInstrDesc &DefTID, + unsigned DefClass, + unsigned DefIdx, unsigned DefAlign) const { + int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1; + if (RegNo <= 0) + // Def is the address writeback. + return ItinData->getOperandCycle(DefClass, DefIdx); + + int DefCycle; + if (Subtarget.isCortexA8()) { + // (regno / 2) + (regno % 2) + 1 + DefCycle = RegNo / 2 + 1; + if (RegNo % 2) + ++DefCycle; + } else if (Subtarget.isCortexA9()) { + DefCycle = RegNo; + bool isSLoad = false; + + switch (DefTID.getOpcode()) { + default: break; + case ARM::VLDMSIA: + case ARM::VLDMSDB: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + isSLoad = true; + break; + } + + // If there are odd number of 'S' registers or if it's not 64-bit aligned, + // then it takes an extra cycle. + if ((isSLoad && (RegNo % 2)) || DefAlign < 8) + ++DefCycle; + } else { + // Assume the worst. + DefCycle = RegNo + 2; + } + + return DefCycle; +} + +int +ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, + const TargetInstrDesc &DefTID, + unsigned DefClass, + unsigned DefIdx, unsigned DefAlign) const { + int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1; + if (RegNo <= 0) + // Def is the address writeback. + return ItinData->getOperandCycle(DefClass, DefIdx); + + int DefCycle; + if (Subtarget.isCortexA8()) { + // 4 registers would be issued: 1, 2, 1. + // 5 registers would be issued: 1, 2, 2. + DefCycle = RegNo / 2; + if (DefCycle < 1) + DefCycle = 1; + // Result latency is issue cycle + 2: E2. + DefCycle += 2; + } else if (Subtarget.isCortexA9()) { + DefCycle = (RegNo / 2); + // If there are odd number of registers or if it's not 64-bit aligned, + // then it takes an extra AGU (Address Generation Unit) cycle. + if ((RegNo % 2) || DefAlign < 8) + ++DefCycle; + // Result latency is AGU cycles + 2. + DefCycle += 2; + } else { + // Assume the worst. + DefCycle = RegNo + 2; + } + + return DefCycle; +} + +int +ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, + const TargetInstrDesc &UseTID, + unsigned UseClass, + unsigned UseIdx, unsigned UseAlign) const { + int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1; + if (RegNo <= 0) + return ItinData->getOperandCycle(UseClass, UseIdx); + + int UseCycle; + if (Subtarget.isCortexA8()) { + // (regno / 2) + (regno % 2) + 1 + UseCycle = RegNo / 2 + 1; + if (RegNo % 2) + ++UseCycle; + } else if (Subtarget.isCortexA9()) { + UseCycle = RegNo; + bool isSStore = false; + + switch (UseTID.getOpcode()) { + default: break; + case ARM::VSTMSIA: + case ARM::VSTMSDB: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + isSStore = true; + break; + } + + // If there are odd number of 'S' registers or if it's not 64-bit aligned, + // then it takes an extra cycle. + if ((isSStore && (RegNo % 2)) || UseAlign < 8) + ++UseCycle; + } else { + // Assume the worst. + UseCycle = RegNo + 2; + } + + return UseCycle; +} + +int +ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, + const TargetInstrDesc &UseTID, + unsigned UseClass, + unsigned UseIdx, unsigned UseAlign) const { + int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1; + if (RegNo <= 0) + return ItinData->getOperandCycle(UseClass, UseIdx); + + int UseCycle; + if (Subtarget.isCortexA8()) { + UseCycle = RegNo / 2; + if (UseCycle < 2) + UseCycle = 2; + // Read in E3. + UseCycle += 2; + } else if (Subtarget.isCortexA9()) { + UseCycle = (RegNo / 2); + // If there are odd number of registers or if it's not 64-bit aligned, + // then it takes an extra AGU (Address Generation Unit) cycle. + if ((RegNo % 2) || UseAlign < 8) + ++UseCycle; + } else { + // Assume the worst. + UseCycle = 1; + } + return UseCycle; +} + +int +ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const TargetInstrDesc &DefTID, + unsigned DefIdx, unsigned DefAlign, + const TargetInstrDesc &UseTID, + unsigned UseIdx, unsigned UseAlign) const { + unsigned DefClass = DefTID.getSchedClass(); + unsigned UseClass = UseTID.getSchedClass(); + + if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands()) + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); + + // This may be a def / use of a variable_ops instruction, the operand + // latency might be determinable dynamically. Let the target try to + // figure it out. + int DefCycle = -1; + bool LdmBypass = false; + switch (DefTID.getOpcode()) { + default: + DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); + break; + + case ARM::VLDMDIA: + case ARM::VLDMDDB: + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA: + case ARM::VLDMSDB: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); + break; + + case ARM::LDMIA_RET: + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tPUSH: + case ARM::t2LDMIA_RET: + case ARM::t2LDMIA: + case ARM::t2LDMDB: + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + LdmBypass = 1; + DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); + break; + } + + if (DefCycle == -1) + // We can't seem to determine the result latency of the def, assume it's 2. + DefCycle = 2; + + int UseCycle = -1; + switch (UseTID.getOpcode()) { + default: + UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); + break; + + case ARM::VSTMDIA: + case ARM::VSTMDDB: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA: + case ARM::VSTMSDB: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); + break; + + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + case ARM::tSTMIA: + case ARM::tSTMIA_UPD: + case ARM::tPOP_RET: + case ARM::tPOP: + case ARM::t2STMIA: + case ARM::t2STMDB: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: + UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); + break; + } + + if (UseCycle == -1) + // Assume it's read in the first stage. + UseCycle = 1; + + UseCycle = DefCycle - UseCycle + 1; + if (UseCycle > 0) { + if (LdmBypass) { + // It's a variable_ops instruction so we can't use DefIdx here. Just use + // first def operand. + if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1, + UseClass, UseIdx)) + --UseCycle; + } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, + UseClass, UseIdx)) { + --UseCycle; + } + } + + return UseCycle; +} + +int +ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || + DefMI->isRegSequence() || DefMI->isImplicitDef()) + return 1; + + const TargetInstrDesc &DefTID = DefMI->getDesc(); + if (!ItinData || ItinData->isEmpty()) + return DefTID.mayLoad() ? 3 : 1; + + const TargetInstrDesc &UseTID = UseMI->getDesc(); + const MachineOperand &DefMO = DefMI->getOperand(DefIdx); + if (DefMO.getReg() == ARM::CPSR) { + if (DefMI->getOpcode() == ARM::FMSTAT) { + // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) + return Subtarget.isCortexA9() ? 1 : 20; + } + + // CPSR set and branch can be paired in the same cycle. + if (UseTID.isBranch()) + return 0; + } + + unsigned DefAlign = DefMI->hasOneMemOperand() + ? (*DefMI->memoperands_begin())->getAlignment() : 0; + unsigned UseAlign = UseMI->hasOneMemOperand() + ? (*UseMI->memoperands_begin())->getAlignment() : 0; + int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, + UseTID, UseIdx, UseAlign); + + if (Latency > 1 && + (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { + // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] + // variants are one cycle cheaper. + switch (DefTID.getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = DefMI->getOperand(3).getImm(); + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (ShImm == 0 || + (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) + --Latency; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl only. + unsigned ShAmt = DefMI->getOperand(3).getImm(); + if (ShAmt == 0 || ShAmt == 2) + --Latency; + break; + } + } + } + + return Latency; +} + +int +ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + if (!DefNode->isMachineOpcode()) + return 1; + + const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode()); + + if (isZeroCost(DefTID.Opcode)) + return 0; + + if (!ItinData || ItinData->isEmpty()) + return DefTID.mayLoad() ? 3 : 1; + + if (!UseNode->isMachineOpcode()) { + int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx); + if (Subtarget.isCortexA9()) + return Latency <= 2 ? 1 : Latency - 1; + else + return Latency <= 3 ? 1 : Latency - 2; + } + + const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode()); + const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); + unsigned DefAlign = !DefMN->memoperands_empty() + ? (*DefMN->memoperands_begin())->getAlignment() : 0; + const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); + unsigned UseAlign = !UseMN->memoperands_empty() + ? (*UseMN->memoperands_begin())->getAlignment() : 0; + int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, + UseTID, UseIdx, UseAlign); + + if (Latency > 1 && + (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { + // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] + // variants are one cycle cheaper. + switch (DefTID.getOpcode()) { + default: break; + case ARM::LDRrs: + case ARM::LDRBrs: { + unsigned ShOpVal = + cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); + if (ShImm == 0 || + (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) + --Latency; + break; + } + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSHs: { + // Thumb2 mode: lsl only. + unsigned ShAmt = + cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); + if (ShAmt == 0 || ShAmt == 2) + --Latency; + break; + } + } + } + + return Latency; +} + +int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + if (MI->isCopyLike() || MI->isInsertSubreg() || + MI->isRegSequence() || MI->isImplicitDef()) + return 1; + + if (!ItinData || ItinData->isEmpty()) + return 1; + + const TargetInstrDesc &TID = MI->getDesc(); + unsigned Class = TID.getSchedClass(); + unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; + if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR)) + // When predicated, CPSR is an additional source operand for CPSR updating + // instructions, this apparently increases their latencies. + *PredCost = 1; + if (UOps) + return ItinData->getStageLatency(Class); + return getNumMicroOps(ItinData, MI); +} + +int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const { + if (!Node->isMachineOpcode()) + return 1; + + if (!ItinData || ItinData->isEmpty()) + return 1; + + unsigned Opcode = Node->getMachineOpcode(); + switch (Opcode) { + default: + return ItinData->getStageLatency(get(Opcode).getSchedClass()); + case ARM::VLDMQIA: + case ARM::VLDMQDB: + case ARM::VSTMQIA: + case ARM::VSTMQDB: + return 2; + } +} + +bool ARMBaseInstrInfo:: +hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; + unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; + if (Subtarget.isCortexA8() && + (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) + // CortexA8 VFP instructions are not pipelined. + return true; + + // Hoist VFP / NEON instructions with 4 or higher latency. + int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + if (Latency <= 3) + return false; + return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || + UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; +} + +bool ARMBaseInstrInfo:: +hasLowDefLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx) const { + if (!ItinData || ItinData->isEmpty()) + return false; + + unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; + if (DDomain == ARMII::DomainGeneral) { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); + return (DefCycle != -1 && DefCycle <= 2); + } + return false; +} + +bool +ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, + unsigned &AddSubOpc, + bool &NegAcc, bool &HasLane) const { + DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); + if (I == MLxEntryMap.end()) + return false; + + const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; + MulOpc = Entry.MulOpc; + AddSubOpc = Entry.AddSubOpc; + NegAcc = Entry.NegAcc; + HasLane = Entry.HasLane; + return true; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b4f4a33..1fb8872 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -17,6 +17,8 @@ #include "ARM.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" namespace llvm { class ARMSubtarget; @@ -33,7 +35,7 @@ namespace ARMII { //===------------------------------------------------------------------===// // This four-bit field describes the addressing mode used. - AddrModeMask = 0xf, + AddrModeMask = 0x1f, AddrModeNone = 0, AddrMode1 = 1, AddrMode2 = 2, @@ -50,9 +52,10 @@ namespace ARMII { AddrModeT2_so = 13, AddrModeT2_pc = 14, // +/- i12 for pc relative data AddrModeT2_i8s4 = 15, // i8 * 4 + AddrMode_i12 = 16, // Size* - Flags to keep track of the size of an instruction. - SizeShift = 4, + SizeShift = 5, SizeMask = 7 << SizeShift, SizeSpecial = 1, // 0 byte pseudo or special case. Size8Bytes = 2, @@ -61,7 +64,7 @@ namespace ARMII { // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load // and store ops only. Generic "updating" flag is used for ld/st multiple. - IndexModeShift = 7, + IndexModeShift = 8, IndexModeMask = 3 << IndexModeShift, IndexModePre = 1, IndexModePost = 2, @@ -70,7 +73,7 @@ namespace ARMII { //===------------------------------------------------------------------===// // Instruction encoding formats. // - FormShift = 9, + FormShift = 10, FormMask = 0x3f << FormShift, // Pseudo instructions @@ -143,15 +146,15 @@ namespace ARMII { // UnaryDP - Indicates this is a unary data processing instruction, i.e. // it doesn't have a Rn operand. - UnaryDP = 1 << 15, + UnaryDP = 1 << 16, // Xform16Bit - Indicates this Thumb2 instruction may be transformed into // a 16-bit Thumb instruction if certain conditions are met. - Xform16Bit = 1 << 16, + Xform16Bit = 1 << 17, //===------------------------------------------------------------------===// // Code domain. - DomainShift = 17, + DomainShift = 18, DomainMask = 3 << DomainShift, DomainGeneral = 0 << DomainShift, DomainVFP = 1 << DomainShift, @@ -160,6 +163,11 @@ namespace ARMII { //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating // machine instructions. + // + // FIXME: This list will need adjusting/fixing as the MC code emitter + // takes shape and the ARMCodeEmitter.cpp bits go away. + ShiftTypeShift = 4, + M_BitShift = 5, ShiftImmShift = 5, ShiftShift = 7, @@ -181,29 +189,15 @@ namespace ARMII { I_BitShift = 25, CondShift = 28 }; - - /// Target Operand Flag enum. - enum TOF { - //===------------------------------------------------------------------===// - // ARM Specific MachineOperand flags. - - MO_NO_FLAG, - - /// MO_LO16 - On a symbol operand, this represents a relocation containing - /// lower 16 bit of the address. Used only via movw instruction. - MO_LO16, - - /// MO_HI16 - On a symbol operand, this represents a relocation containing - /// higher 16 bit of the address. Used only via movt instruction. - MO_HI16 - }; } class ARMBaseInstrInfo : public TargetInstrInfoImpl { const ARMSubtarget &Subtarget; + protected: // Can be only subclassed. explicit ARMBaseInstrInfo(const ARMSubtarget &STI); + public: // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. @@ -216,10 +210,13 @@ public: virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; const ARMSubtarget &getSubtarget() const { return Subtarget; } - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const; // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -301,7 +298,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const; + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const; /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to /// determine if two loads are loading from the same base address. It should @@ -328,26 +326,117 @@ public: const MachineFunction &MF) const; virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumInstrs) const; + unsigned NumCyles, unsigned ExtraPredCycles, + float Prob, float Confidence) const; - virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT, - MachineBasicBlock &FMBB,unsigned NumF) const; + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumT, unsigned ExtraT, + MachineBasicBlock &FMBB, + unsigned NumF, unsigned ExtraF, + float Probability, float Confidence) const; virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumInstrs) const { - return NumInstrs && NumInstrs == 1; + unsigned NumCyles, + float Probability, + float Confidence) const { + return NumCyles == 1; } /// AnalyzeCompare - For a comparison instruction, return the source register /// in SrcReg and the value it compares against in CmpValue. Return true if /// the comparison instruction can be analyzed. virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - int &CmpValue) const; + int &CmpMask, int &CmpValue) const; - /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so + /// OptimizeCompareInstr - Convert the instruction to set the zero flag so /// that we can remove a "comparison with zero". - virtual bool ConvertToSetZeroFlag(MachineInstr *Instr, - MachineInstr *CmpInstr) const; + virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const; + + /// FoldImmediate - 'Reg' is known to be defined by a move immediate + /// instruction, try to fold the immediate into the use instruction. + virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const; + + virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const; + + virtual + int getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; + virtual + int getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const; +private: + int getVLDMDefCycle(const InstrItineraryData *ItinData, + const TargetInstrDesc &DefTID, + unsigned DefClass, + unsigned DefIdx, unsigned DefAlign) const; + int getLDMDefCycle(const InstrItineraryData *ItinData, + const TargetInstrDesc &DefTID, + unsigned DefClass, + unsigned DefIdx, unsigned DefAlign) const; + int getVSTMUseCycle(const InstrItineraryData *ItinData, + const TargetInstrDesc &UseTID, + unsigned UseClass, + unsigned UseIdx, unsigned UseAlign) const; + int getSTMUseCycle(const InstrItineraryData *ItinData, + const TargetInstrDesc &UseTID, + unsigned UseClass, + unsigned UseIdx, unsigned UseAlign) const; + int getOperandLatency(const InstrItineraryData *ItinData, + const TargetInstrDesc &DefTID, + unsigned DefIdx, unsigned DefAlign, + const TargetInstrDesc &UseTID, + unsigned UseIdx, unsigned UseAlign) const; + + int getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, unsigned *PredCost = 0) const; + + int getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const; + + bool hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; + bool hasLowDefLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx) const; + +private: + /// Modeling special VFP / NEON fp MLA / MLS hazards. + + /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal + /// MLx table. + DenseMap<unsigned, unsigned> MLxEntryMap; + + /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause + /// stalls when scheduled together with fp MLA / MLS opcodes. + SmallSet<unsigned, 16> MLxHazardOpcodes; + +public: + /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS + /// instruction. + bool isFpMLxInstruction(unsigned Opcode) const { + return MLxEntryMap.count(Opcode); + } + + /// isFpMLxInstruction - This version also returns the multiply opcode and the + /// addition / subtraction opcode to expand to. Return true for 'HasLane' for + /// the MLX instructions with an extra lane operand. + bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, + unsigned &AddSubOpc, bool &NegAcc, + bool &HasLane) const; + + /// canCauseFpMLxStall - Return true if an instruction of the specified opcode + /// will cause stalls when scheduled after (within 4-cycle window) a fp + /// MLA / MLS instruction. + bool canCauseFpMLxStall(unsigned Opcode) const { + return MLxHazardOpcodes.count(Opcode); + } }; static inline @@ -389,7 +478,7 @@ bool isJumpTableBranchOpcode(int Opc) { static inline bool isIndirectBranchOpcode(int Opc) { - return Opc == ARM::BRIND || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND; + return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND; } /// getInstrPredicate - If instruction is predicated, returns its predicate @@ -413,6 +502,12 @@ void emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII); +void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, const TargetInstrInfo &TII, + const ARMBaseRegisterInfo& MRI, + DebugLoc dl); /// rewriteARMFrameIndex / rewriteT2FrameIndex - diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index eceafad..67a4b7d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -15,6 +15,7 @@ #include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMFrameLowering.h" #include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" @@ -32,120 +33,25 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" -namespace llvm { +using namespace llvm; + static cl::opt<bool> ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false), cl::desc("Force use of virtual base registers for stack load/store")); static cl::opt<bool> EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden, cl::desc("Enable pre-regalloc stack frame index allocation")); -} - -using namespace llvm; - static cl::opt<bool> EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); -unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, - bool *isSPVFP) { - if (isSPVFP) - *isSPVFP = false; - - using namespace ARM; - switch (RegEnum) { - default: - llvm_unreachable("Unknown ARM register!"); - case R0: case D0: case Q0: return 0; - case R1: case D1: case Q1: return 1; - case R2: case D2: case Q2: return 2; - case R3: case D3: case Q3: return 3; - case R4: case D4: case Q4: return 4; - case R5: case D5: case Q5: return 5; - case R6: case D6: case Q6: return 6; - case R7: case D7: case Q7: return 7; - case R8: case D8: case Q8: return 8; - case R9: case D9: case Q9: return 9; - case R10: case D10: case Q10: return 10; - case R11: case D11: case Q11: return 11; - case R12: case D12: case Q12: return 12; - case SP: case D13: case Q13: return 13; - case LR: case D14: case Q14: return 14; - case PC: case D15: case Q15: return 15; - - case D16: return 16; - case D17: return 17; - case D18: return 18; - case D19: return 19; - case D20: return 20; - case D21: return 21; - case D22: return 22; - case D23: return 23; - case D24: return 24; - case D25: return 25; - case D26: return 26; - case D27: return 27; - case D28: return 28; - case D29: return 29; - case D30: return 30; - case D31: return 31; - - case S0: case S1: case S2: case S3: - case S4: case S5: case S6: case S7: - case S8: case S9: case S10: case S11: - case S12: case S13: case S14: case S15: - case S16: case S17: case S18: case S19: - case S20: case S21: case S22: case S23: - case S24: case S25: case S26: case S27: - case S28: case S29: case S30: case S31: { - if (isSPVFP) - *isSPVFP = true; - switch (RegEnum) { - default: return 0; // Avoid compile time warning. - case S0: return 0; - case S1: return 1; - case S2: return 2; - case S3: return 3; - case S4: return 4; - case S5: return 5; - case S6: return 6; - case S7: return 7; - case S8: return 8; - case S9: return 9; - case S10: return 10; - case S11: return 11; - case S12: return 12; - case S13: return 13; - case S14: return 14; - case S15: return 15; - case S16: return 16; - case S17: return 17; - case S18: return 18; - case S19: return 19; - case S20: return 20; - case S21: return 21; - case S22: return 22; - case S23: return 23; - case S24: return 24; - case S25: return 25; - case S26: return 26; - case S27: return 27; - case S28: return 28; - case S29: return 29; - case S30: return 30; - case S31: return 31; - } - } - } -} - ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), @@ -180,12 +86,14 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + // FIXME: avoid re-calculating this everytime. BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); Reserved.set(ARM::FPSCR); - if (hasFP(MF)) + if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) Reserved.set(BasePtr); @@ -197,6 +105,8 @@ getReservedRegs(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + switch (Reg) { default: break; case ARM::SP: @@ -208,7 +118,7 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, break; case ARM::R7: case ARM::R11: - if (FramePtr == Reg && hasFP(MF)) + if (FramePtr == Reg && TFI->hasFP(MF)) return true; break; case ARM::R9: @@ -444,6 +354,7 @@ std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); // Alternative register allocation orders when favoring even / odd registers // of register pairs. @@ -525,7 +436,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, return std::make_pair(RC->allocation_order_begin(MF), RC->allocation_order_end(MF)); - if (!hasFP(MF)) { + if (!TFI->hasFP(MF)) { if (!STI.isR9Reserved()) return std::make_pair(GPREven1, GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); @@ -554,7 +465,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, return std::make_pair(RC->allocation_order_begin(MF), RC->allocation_order_end(MF)); - if (!hasFP(MF)) { + if (!TFI->hasFP(MF)) { if (!STI.isR9Reserved()) return std::make_pair(GPROdd1, GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); @@ -606,7 +517,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg); if ((Hint.first == (unsigned)ARMRI::RegPairOdd || Hint.first == (unsigned)ARMRI::RegPairEven) && - Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) { + TargetRegisterInfo::isVirtualRegister(Hint.second)) { // If 'Reg' is one of the even / odd register pair and it's now changed // (e.g. coalesced) into a different register. The other register of the // pair allocation hint must be updated to reflect the relationship @@ -619,23 +530,6 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } } -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. This is true if the function has variable sized allocas -/// or if frame pointer elimination is disabled. -/// -bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { - // Mac OS X requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetDarwin()) - return true; - - const MachineFrameInfo *MFI = MF.getFrameInfo(); - // Always eliminate non-leaf frame pointers. - return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || - needsStackRealignment(MF) || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); -} - bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -681,7 +575,7 @@ bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); @@ -697,417 +591,19 @@ cannotEliminateFrame(const MachineFunction &MF) const { || needsStackRealignment(MF); } -/// estimateStackSize - Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - } - return (unsigned)Offset; -} - -/// estimateRSStackSizeLimit - Look at each instruction that references stack -/// frames and return the stack size limit beyond which some of these -/// instructions will require a scratch register during their expansion later. -unsigned -ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned Limit = (1 << 12) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!I->getOperand(i).isFI()) continue; - - // When using ADDri to get the address of a stack object, 255 is the - // largest offset guaranteed to fit in the immediate offset. - if (I->getOpcode() == ARM::ADDri) { - Limit = std::min(Limit, (1U << 8) - 1); - break; - } - - // Otherwise check the addressing mode. - switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { - case ARMII::AddrMode3: - case ARMII::AddrModeT2_i8: - Limit = std::min(Limit, (1U << 8) - 1); - break; - case ARMII::AddrMode5: - case ARMII::AddrModeT2_i8s4: - Limit = std::min(Limit, ((1U << 8) - 1) * 4); - break; - case ARMII::AddrModeT2_i12: - // i12 supports only positive offset so these will be converted to - // i8 opcodes. See llvm::rewriteT2FrameIndex. - if (hasFP(MF) && AFI->hasStackFrame()) - Limit = std::min(Limit, (1U << 8) - 1); - break; - case ARMII::AddrMode6: - // Addressing mode 6 (load/store) instructions can't encode an - // immediate offset for stack references. - return 0; - default: - break; - } - break; // At most one FI per instruction - } - } - } - - return Limit; -} - -static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, - const ARMBaseInstrInfo &TII) { - unsigned FnSize = 0; - for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); - I != E; ++I) - FnSize += TII.GetInstSizeInBytes(I); - } - return FnSize; -} - -void -ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - // This tells PEI to spill the FP as if it is any other callee-save register - // to take advantage the eliminateFrameIndex machinery. This also ensures it - // is spilled in the order specified by getCalleeSavedRegs() to make it easier - // to combine multiple loads / stores. - bool CanEliminateFrame = true; - bool CS1Spilled = false; - bool LRSpilled = false; - unsigned NumGPRSpills = 0; - SmallVector<unsigned, 4> UnspilledCS1GPRs; - SmallVector<unsigned, 4> UnspilledCS2GPRs; - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Spill R4 if Thumb2 function requires stack realignment - it will be used as - // scratch register. - // FIXME: It will be better just to find spare register here. - if (needsStackRealignment(MF) && - AFI->isThumb2Function()) - MF.getRegInfo().setPhysRegUsed(ARM::R4); - - // Spill LR if Thumb1 function uses variable length argument lists. - if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0) - MF.getRegInfo().setPhysRegUsed(ARM::LR); - - // Spill the BasePtr if it's used. - if (hasBasePointer(MF)) - MF.getRegInfo().setPhysRegUsed(BasePtr); - - // Don't spill FP if the frame can be eliminated. This is determined - // by scanning the callee-save registers to see if any is used. - const unsigned *CSRegs = getCalleeSavedRegs(); - for (unsigned i = 0; CSRegs[i]; ++i) { - unsigned Reg = CSRegs[i]; - bool Spilled = false; - if (MF.getRegInfo().isPhysRegUsed(Reg)) { - AFI->setCSRegisterIsSpilled(Reg); - Spilled = true; - CanEliminateFrame = false; - } else { - // Check alias registers too. - for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) { - if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { - Spilled = true; - CanEliminateFrame = false; - } - } - } - - if (!ARM::GPRRegisterClass->contains(Reg)) - continue; - - if (Spilled) { - NumGPRSpills++; - - if (!STI.isTargetDarwin()) { - if (Reg == ARM::LR) - LRSpilled = true; - CS1Spilled = true; - continue; - } - - // Keep track if LR and any of R4, R5, R6, and R7 is spilled. - switch (Reg) { - case ARM::LR: - LRSpilled = true; - // Fallthrough - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - CS1Spilled = true; - break; - default: - break; - } - } else { - if (!STI.isTargetDarwin()) { - UnspilledCS1GPRs.push_back(Reg); - continue; - } - - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - UnspilledCS1GPRs.push_back(Reg); - break; - default: - UnspilledCS2GPRs.push_back(Reg); - break; - } - } - } - - bool ForceLRSpill = false; - if (!LRSpilled && AFI->isThumb1OnlyFunction()) { - unsigned FnSize = GetFunctionSizeInBytes(MF, TII); - // Force LR to be spilled if the Thumb function size is > 2048. This enables - // use of BL to implement far jump. If it turns out that it's not needed - // then the branch fix up path will undo it. - if (FnSize >= (1 << 11)) { - CanEliminateFrame = false; - ForceLRSpill = true; - } - } - - // If any of the stack slot references may be out of range of an immediate - // offset, make sure a register (or a spill slot) is available for the - // register scavenger. Note that if we're indexing off the frame pointer, the - // effective stack size is 4 bytes larger since the FP points to the stack - // slot of the previous FP. Also, if we have variable sized objects in the - // function, stack slot references will often be negative, and some of - // our instructions are positive-offset only, so conservatively consider - // that case to want a spill slot (or register) as well. Similarly, if - // the function adjusts the stack pointer during execution and the - // adjustments aren't already part of our stack size estimate, our offset - // calculations may be off, so be conservative. - // FIXME: We could add logic to be more precise about negative offsets - // and which instructions will need a scratch register for them. Is it - // worth the effort and added fragility? - bool BigStack = - (RS && - (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= - estimateRSStackSizeLimit(MF))) - || MFI->hasVarSizedObjects() - || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); - - bool ExtraCSSpill = false; - if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { - AFI->setHasStackFrame(true); - - // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. - // Spill LR as well so we can fold BX_RET to the registers restore (LDM). - if (!LRSpilled && CS1Spilled) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); - AFI->setCSRegisterIsSpilled(ARM::LR); - NumGPRSpills++; - UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), - UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); - ForceLRSpill = false; - ExtraCSSpill = true; - } - - if (hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(FramePtr); - NumGPRSpills++; - } - - // If stack and double are 8-byte aligned and we are spilling an odd number - // of GPRs. Spill one extra callee save GPR so we won't have to pad between - // the integer and double callee save areas. - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - if (TargetAlign == 8 && (NumGPRSpills & 1)) { - if (CS1Spilled && !UnspilledCS1GPRs.empty()) { - for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { - unsigned Reg = UnspilledCS1GPRs[i]; - // Don't spill high register if the function is thumb1 - if (!AFI->isThumb1OnlyFunction() || - isARMLowRegister(Reg) || Reg == ARM::LR) { - MF.getRegInfo().setPhysRegUsed(Reg); - AFI->setCSRegisterIsSpilled(Reg); - if (!isReservedReg(MF, Reg)) - ExtraCSSpill = true; - break; - } - } - } else if (!UnspilledCS2GPRs.empty() && - !AFI->isThumb1OnlyFunction()) { - unsigned Reg = UnspilledCS2GPRs.front(); - MF.getRegInfo().setPhysRegUsed(Reg); - AFI->setCSRegisterIsSpilled(Reg); - if (!isReservedReg(MF, Reg)) - ExtraCSSpill = true; - } - } - - // Estimate if we might need to scavenge a register at some point in order - // to materialize a stack offset. If so, either spill one additional - // callee-saved register or reserve a special spill slot to facilitate - // register scavenging. Thumb1 needs a spill slot for stack pointer - // adjustments also, even when the frame itself is small. - if (BigStack && !ExtraCSSpill) { - // If any non-reserved CS register isn't spilled, just spill one or two - // extra. That should take care of it! - unsigned NumExtras = TargetAlign / 4; - SmallVector<unsigned, 2> Extras; - while (NumExtras && !UnspilledCS1GPRs.empty()) { - unsigned Reg = UnspilledCS1GPRs.back(); - UnspilledCS1GPRs.pop_back(); - if (!isReservedReg(MF, Reg) && - (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || - Reg == ARM::LR)) { - Extras.push_back(Reg); - NumExtras--; - } - } - // For non-Thumb1 functions, also check for hi-reg CS registers - if (!AFI->isThumb1OnlyFunction()) { - while (NumExtras && !UnspilledCS2GPRs.empty()) { - unsigned Reg = UnspilledCS2GPRs.back(); - UnspilledCS2GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { - Extras.push_back(Reg); - NumExtras--; - } - } - } - if (Extras.size() && NumExtras == 0) { - for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MF.getRegInfo().setPhysRegUsed(Extras[i]); - AFI->setCSRegisterIsSpilled(Extras[i]); - } - } else if (!AFI->isThumb1OnlyFunction()) { - // note: Thumb1 functions spill to R12, not the stack. Reserve a slot - // closest to SP or frame pointer. - const TargetRegisterClass *RC = ARM::GPRRegisterClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } - } - } - - if (ForceLRSpill) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); - AFI->setCSRegisterIsSpilled(ARM::LR); - AFI->setLRIsSpilledForFarJump(true); - } -} - unsigned ARMBaseRegisterInfo::getRARegister() const { return ARM::LR; } unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - if (hasFP(MF)) + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (TFI->hasFP(MF)) return FramePtr; return ARM::SP; } -// Provide a base+offset reference to an FI slot for debug info. It's the -// same as what we use for resolving the code-gen references for now. -// FIXME: This can go wrong when references are SP-relative and simple call -// frames aren't used. -int -ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const { - return ResolveFrameIndexReference(MF, FI, FrameReg, 0); -} - -int -ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF, - int FI, - unsigned &FrameReg, - int SPAdj) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); - int FPOffset = Offset - AFI->getFramePtrSpillOffset(); - bool isFixed = MFI->isFixedObjectIndex(FI); - - FrameReg = ARM::SP; - Offset += SPAdj; - if (AFI->isGPRCalleeSavedArea1Frame(FI)) - return Offset - AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FI)) - return Offset - AFI->getGPRCalleeSavedArea2Offset(); - else if (AFI->isDPRCalleeSavedAreaFrame(FI)) - return Offset - AFI->getDPRCalleeSavedAreaOffset(); - - // When dynamically realigning the stack, use the frame pointer for - // parameters, and the stack/base pointer for locals. - if (needsStackRealignment(MF)) { - assert (hasFP(MF) && "dynamic stack realignment without a FP!"); - if (isFixed) { - FrameReg = getFrameRegister(MF); - Offset = FPOffset; - } else if (MFI->hasVarSizedObjects()) { - assert(hasBasePointer(MF) && - "VLAs and dynamic stack alignment, but missing base pointer!"); - FrameReg = BasePtr; - } - return Offset; - } - - // If there is a frame pointer, use it when we can. - if (hasFP(MF) && AFI->hasStackFrame()) { - // Use frame pointer to reference fixed objects. Use it for locals if - // there are VLAs (and thus the SP isn't reliable as a base). - if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) { - FrameReg = getFrameRegister(MF); - return FPOffset; - } else if (MFI->hasVarSizedObjects()) { - assert(hasBasePointer(MF) && "missing base pointer!"); - // Use the base register since we have it. - FrameReg = BasePtr; - } else if (AFI->isThumb2Function()) { - // In Thumb2 mode, the negative offset is very limited. Try to avoid - // out of range references. - if (FPOffset >= -255 && FPOffset < 0) { - FrameReg = getFrameRegister(MF); - return FPOffset; - } - } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { - // Otherwise, use SP or FP, whichever is closer to the stack slot. - FrameReg = getFrameRegister(MF); - return FPOffset; - } - } - // Use the base pointer if we have one. - if (hasBasePointer(MF)) - FrameReg = BasePtr; - return Offset; -} - -int -ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - unsigned FrameReg; - return getFrameIndexReference(MF, FI, FrameReg); -} - unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); return 0; @@ -1320,7 +816,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) .addReg(DestReg, getDefRegState(true), SubIdx) .addConstantPoolIndex(Idx) - .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); + .addImm(0).addImm(Pred).addReg(PredReg); } bool ARMBaseRegisterInfo:: @@ -1338,34 +834,6 @@ requiresVirtualBaseRegisters(const MachineFunction &MF) const { return EnableLocalStackAlloc; } -// hasReservedCallFrame - Under normal circumstances, when a frame pointer is -// not required, we reserve argument space for call sites in the function -// immediately on entry to the current function. This eliminates the need for -// add/sub sp brackets around call sites. Returns true if the call frame is -// included as part of the stack frame. -bool ARMBaseRegisterInfo:: -hasReservedCallFrame(const MachineFunction &MF) const { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - unsigned CFSize = FFI->getMaxCallFrameSize(); - // It's not always a good idea to include the call frame as part of the - // stack frame. ARM (especially Thumb) has small immediate offset to - // address the stack frame. So a large call frame can cause poor codegen - // and may even makes it impossible to scavenge a register. - if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 - return false; - - return !MF.getFrameInfo()->hasVarSizedObjects(); -} - -// canSimplifyCallFramePseudos - If there is a reserved call frame, the -// call frame pseudos can be simplified. Unlike most targets, having a FP -// is not sufficient here since we still may reference some objects via SP -// even when FP is available in Thumb2 mode. -bool ARMBaseRegisterInfo:: -canSimplifyCallFramePseudos(const MachineFunction &MF) const { - return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); -} - static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -1384,7 +852,8 @@ emitSPUpdate(bool isARM, void ARMBaseRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (!TFI->hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount // ADJCALLSTACKUP -> add, sp, sp, amount @@ -1395,7 +864,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = TFI->getStackAlignment(); Amount = (Amount+Align-1)/Align*Align; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1433,8 +902,7 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { switch (AddrMode) { case ARMII::AddrModeT2_i8: case ARMII::AddrModeT2_i12: - // i8 supports only negative, and i12 supports only positive, so - // based on Offset sign, consider the appropriate instruction + case ARMII::AddrMode_i12: InstrOffs = MI->getOperand(Idx+1).getImm(); Scale = 1; break; @@ -1496,8 +964,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // return false for everything else. unsigned Opc = MI->getOpcode(); switch (Opc) { - case ARM::LDR: case ARM::LDRH: case ARM::LDRB: - case ARM::STR: case ARM::STRH: case ARM::STRB: + case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12: + case ARM::STRi12: case ARM::STRH: case ARM::STRBi12: case ARM::t2LDRi12: case ARM::t2LDRi8: case ARM::t2STRi12: case ARM::t2STRi8: case ARM::VLDRS: case ARM::VLDRD: @@ -1516,6 +984,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1542,8 +1011,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. - unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - if (hasFP(MF) && + unsigned StackAlign = TFI->getStackAlignment(); + if (TFI->hasFP(MF) && !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { if (isFrameOffsetLegal(MI, FPOffset)) return false; @@ -1560,19 +1029,25 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { return true; } -/// materializeFrameBaseRegister - Insert defining instruction(s) for -/// BaseReg to be a pointer to FrameIdx before insertion point I. +/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to +/// be a pointer to FrameIdx at the beginning of the basic block. void ARMBaseRegisterInfo:: -materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg, - int FrameIdx, int64_t Offset) const { - ARMFunctionInfo *AFI = - I->getParent()->getParent()->getInfo<ARMFunctionInfo>(); +materializeFrameBaseRegister(MachineBasicBlock *MBB, + unsigned BaseReg, int FrameIdx, + int64_t Offset) const { + ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + MachineBasicBlock::iterator Ins = MBB->begin(); + DebugLoc DL; // Defaults to "unknown" + if (Ins != MBB->end()) + DL = Ins->getDebugLoc(); + MachineInstrBuilder MIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg) + BuildMI(*MBB, Ins, DL, TII.get(ADDriOpc), BaseReg) .addFrameIndex(FrameIdx).addImm(Offset); + if (!AFI->isThumb1OnlyFunction()) AddDefaultCC(AddDefaultPred(MIB)); } @@ -1640,6 +1115,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, NumBits = 8; Scale = 4; break; + case ARMII::AddrMode_i12: case ARMII::AddrMode2: NumBits = 12; break; @@ -1679,6 +1155,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMFrameLowering *TFI = + static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); @@ -1691,7 +1169,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned FrameReg; - int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); + int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); // Special handling of dbg_value instructions. if (MI.isDebugValue()) { @@ -1737,339 +1215,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); } + // Update the original instruction to use the scratch register. MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + if (MI.getOpcode() == ARM::t2ADDrSPi) + MI.setDesc(TII.get(ARM::t2ADDri)); + else if (MI.getOpcode() == ARM::t2SUBrSPi) + MI.setDesc(TII.get(ARM::t2SUBri)); } } -/// Move iterator past the next bunch of callee save load / store ops for -/// the particular spill area (1: integer area 1, 2: integer area 2, -/// 3: fp area, 0: don't care). -static void movePastCSLoadStoreOps(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - int Opc1, int Opc2, unsigned Area, - const ARMSubtarget &STI) { - while (MBBI != MBB.end() && - ((MBBI->getOpcode() == Opc1) || (MBBI->getOpcode() == Opc2)) && - MBBI->getOperand(1).isFI()) { - if (Area != 0) { - bool Done = false; - unsigned Category = 0; - switch (MBBI->getOperand(0).getReg()) { - case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: - case ARM::LR: - Category = 1; - break; - case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: - Category = STI.isTargetDarwin() ? 2 : 1; - break; - case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11: - case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15: - Category = 3; - break; - default: - Done = true; - break; - } - if (Done || Category != Area) - break; - } - - ++MBBI; - } -} - -void ARMBaseRegisterInfo:: -emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - assert(!AFI->isThumb1OnlyFunction() && - "This emitPrologue does not support Thumb1!"); - bool isARM = !AFI->isThumbFunction(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); - unsigned NumBytes = MFI->getStackSize(); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Determine the sizes of each callee-save spill areas and record which frame - // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; - int FramePtrSpillFI = 0; - - // Allocate the vararg register save area. This is not counted in NumBytes. - if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize); - - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); - return; - } - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - int FI = CSI[i].getFrameIdx(); - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - break; - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { - AFI->addGPRCalleeSavedArea2Frame(FI); - GPRCS2Size += 4; - } else { - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - } - break; - default: - AFI->addDPRCalleeSavedAreaFrame(FI); - DPRCSSize += 8; - } - } - - // Build the new SUBri to adjust SP for integer callee-save spill area 1. - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS1Size); - movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 1, STI); - - // Set FP to point to the stack slot that contains the previous FP. - // For Darwin, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not Darwin, all the callee-saved registers go - // into spill area 1, including the FP in R11. In either case, it is - // now safe to emit this assignment. - bool HasFP = hasFP(MF); - if (HasFP) { - unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); - AddDefaultCC(AddDefaultPred(MIB)); - } - - // Build the new SUBri to adjust SP for integer callee-save spill area 2. - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS2Size); - - // Build the new SUBri to adjust SP for FP callee-save spill area. - movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 2, STI); - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRCSSize); - - // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); - unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; - unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - if (HasFP) - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + - NumBytes); - AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); - AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); - AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - - movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI); - NumBytes = DPRCSOffset; - if (NumBytes) { - // Adjust SP after all the callee-save spills. - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); - if (HasFP) - AFI->setShouldRestoreSPFromFP(true); - } - - if (STI.isTargetELF() && hasFP(MF)) { - MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - - AFI->getFramePtrSpillOffset()); - AFI->setShouldRestoreSPFromFP(true); - } - - AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); - AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); - AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - - // If we need dynamic stack realignment, do it here. Be paranoid and make - // sure if we also have VLAs, we have a base pointer for frame access. - if (needsStackRealignment(MF)) { - unsigned MaxAlign = MFI->getMaxAlignment(); - assert (!AFI->isThumb1OnlyFunction()); - if (!AFI->isThumbFunction()) { - // Emit bic sp, sp, MaxAlign - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::BICri), ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addImm(MaxAlign-1))); - } else { - // We cannot use sp as source/dest register here, thus we're emitting the - // following sequence: - // mov r4, sp - // bic r4, r4, MaxAlign - // mov sp, r4 - // FIXME: It will be better just to find spare register here. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4) - .addReg(ARM::SP, RegState::Kill); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::t2BICri), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign-1))); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) - .addReg(ARM::R4, RegState::Kill); - } - - AFI->setShouldRestoreSPFromFP(true); - } - - // If we need a base pointer, set it up here. It's whatever the value - // of the stack pointer is at this point. Any variable size objects - // will be allocated after this, so we can still use the base pointer - // to reference locals. - if (hasBasePointer(MF)) { - if (isARM) - BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr) - .addReg(ARM::SP) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr) - .addReg(ARM::SP); - } - - // If the frame has variable sized objects then the epilogue must restore - // the sp from fp. - if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects()) - AFI->setShouldRestoreSPFromFP(true); -} - -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - -static bool isCSRestore(MachineInstr *MI, - const ARMBaseInstrInfo &TII, - const unsigned *CSRegs) { - return ((MI->getOpcode() == (int)ARM::VLDRD || - MI->getOpcode() == (int)ARM::LDR || - MI->getOpcode() == (int)ARM::t2LDRi12) && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); -} - -void ARMBaseRegisterInfo:: -emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - assert(MBBI->getDesc().isReturn() && - "Can only insert epilog into returning blocks"); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc dl = MBBI->getDebugLoc(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - assert(!AFI->isThumb1OnlyFunction() && - "This emitEpilogue does not support Thumb1!"); - bool isARM = !AFI->isThumbFunction(); - - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); - int NumBytes = (int)MFI->getStackSize(); - - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); - } else { - // Unwind MBBI to point to first LDR / VLDRD. - const unsigned *CSRegs = getCalleeSavedRegs(); - if (MBBI != MBB.begin()) { - do - --MBBI; - while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); - if (!isCSRestore(MBBI, TII, CSRegs)) - ++MBBI; - } - - // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + - AFI->getGPRCalleeSavedArea2Size() + - AFI->getDPRCalleeSavedAreaSize()); - - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. - if (AFI->shouldRestoreSPFromFP()) { - NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - if (NumBytes) { - if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, - ARMCC::AL, 0, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, - ARMCC::AL, 0, TII); - } else { - // Thumb2 or ARM. - if (isARM) - BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) - .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) - .addReg(FramePtr); - } - } else if (NumBytes) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); - - // Move SP to start of integer callee save spill area 2. - movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI); - emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize()); - - // Move SP to start of integer callee save spill area 1. - movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 2, STI); - emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea2Size()); - - // Move SP to SP upon entry to the function. - movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 1, STI); - emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size()); - } - - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { - // Tail call return: adjust the stack pointer and jump to callee. - MBBI = prior(MBB.end()); - MachineOperand &JumpTarget = MBBI->getOperand(0); - - // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - } else if (RetOpcode == ARM::TCRETURNdiND) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - } else if (RetOpcode == ARM::TCRETURNri) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). - addReg(JumpTarget.getReg(), RegState::Kill); - } else if (RetOpcode == ARM::TCRETURNriND) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). - addReg(JumpTarget.getReg(), RegState::Kill); - } - - MachineInstr *NewMI = prior(MBBI); - for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) - NewMI->addOperand(MBBI->getOperand(i)); - - // Delete the pseudo instruction TCRETURN. - MBB.erase(MBBI); - } - - if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); -} - #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index fa2eb6c..ba6bd2b 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -44,6 +44,45 @@ static inline bool isARMLowRegister(unsigned Reg) { } } +/// isARMArea1Register - Returns true if the register is a low register (r0-r7) +/// or a stack/pc register that we should push/pop. +static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { + using namespace ARM; + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + case LR: case SP: case PC: + return true; + case R8: case R9: case R10: case R11: + // For darwin we want r7 and lr to be next to each other. + return !isDarwin; + default: + return false; + } +} + +static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) { + using namespace ARM; + switch (Reg) { + case R8: case R9: case R10: case R11: + // Darwin has this second area. + return isDarwin; + default: + return false; + } +} + +static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) { + using namespace ARM; + switch (Reg) { + case D15: case D14: case D13: case D12: + case D11: case D10: case D9: case D8: + return true; + default: + return false; + } +} + class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: const ARMBaseInstrInfo &TII; @@ -65,12 +104,6 @@ protected: unsigned getOpcode(int Op) const; public: - /// getRegisterNumbering - Given the enum value for some register, e.g. - /// ARM::LR, return the number that it corresponds to (e.g. 14). It - /// also returns true in isSPVFP if the register is a single precision - /// VFP register. - static unsigned getRegisterNumbering(unsigned RegEnum, bool *isSPVFP = 0); - /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; @@ -106,14 +139,13 @@ public: void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const; bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; - void materializeFrameBaseRegister(MachineBasicBlock::iterator I, + void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, int64_t Offset) const; void resolveFrameIndex(MachineBasicBlock::iterator I, @@ -122,17 +154,10 @@ public: bool cannotEliminateFrame(const MachineFunction &MF) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - int getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const; - int ResolveFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg, int SPAdj) const; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + unsigned getBaseRegister() const { return BasePtr; } // Exception handling queries. unsigned getEHExceptionRegister() const; @@ -162,9 +187,6 @@ public: virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; - virtual bool hasReservedCallFrame(const MachineFunction &MF) const; - virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; - virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -172,12 +194,7 @@ public: virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - private: - unsigned estimateRSStackSizeLimit(MachineFunction &MF) const; - unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h index 3b38375..69eddf0 100644 --- a/lib/Target/ARM/ARMBuildAttrs.h +++ b/lib/Target/ARM/ARMBuildAttrs.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file contains enumerations and support routines for ARM build attributes -// as defined in ARM ABI addenda document (ABI release 2.07). +// as defined in ARM ABI addenda document (ABI release 2.08). // //===----------------------------------------------------------------------===// @@ -16,7 +16,14 @@ #define __TARGET_ARMBUILDATTRS_H__ namespace ARMBuildAttrs { - enum { + enum SpecialAttr { + // This is for the .cpu asm attr. It translates into one or more + // AttrType (below) entries in the .ARM.attributes section in the ELF. + SEL_CPU + }; + + enum AttrType { + // Rest correspond to ELF/.ARM.attributes File = 1, Section = 2, Symbol = 3, @@ -52,12 +59,72 @@ namespace ARMBuildAttrs { CPU_unaligned_access = 34, VFP_HP_extension = 36, ABI_FP_16bit_format = 38, + MPextension_use = 42, // was 70, 2.08 ABI + DIV_use = 44, nodefaults = 64, also_compatible_with = 65, T2EE_use = 66, conformance = 67, Virtualization_use = 68, - MPextension_use = 70 + MPextension_use_old = 70 + }; + + // Magic numbers for .ARM.attributes + enum AttrMagic { + Format_Version = 0x41 + }; + + // Legal Values for CPU_arch, (=6), uleb128 + enum CPUArch { + Pre_v4 = 0, + v4 = 1, // e.g. SA110 + v4T = 2, // e.g. ARM7TDMI + v5T = 3, // e.g. ARM9TDMI + v5TE = 4, // e.g. ARM946E_S + v5TEJ = 5, // e.g. ARM926EJ_S + v6 = 6, // e.g. ARM1136J_S + v6KZ = 7, // e.g. ARM1176JZ_S + v6T2 = 8, // e.g. ARM1156T2F_S + v6K = 9, // e.g. ARM1136J_S + v7 = 10, // e.g. Cortex A8, Cortex M3 + v6_M = 11, // e.g. Cortex M1 + v6S_M = 12, // v6_M with the System extensions + v7E_M = 13 // v7_M with DSP extensions + }; + + enum CPUArchProfile { // (=7), uleb128 + Not_Applicable = 0, // pre v7, or cross-profile code + ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8) + RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4) + MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3) + SystemProfile = (0x53) // 'S' Application or real-time profile + }; + + // The following have a lot of common use cases + enum { + //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128 + Not_Allowed = 0, + Allowed = 1, + + // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10) + AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA) + AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA) + AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 + AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) + AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31 + + // Tag_WMMX_arch, (=11), uleb128 + AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) + + // Tag_WMMX_arch, (=11), uleb128 + AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2 + + // Tag_ABI_FP_denormal, (=20), uleb128 + PreserveFPSign = 2, // sign when flushed-to-zero is preserved + + // Tag_ABI_FP_number_model, (=23), uleb128 + AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI]) + AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings }; } diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h new file mode 100644 index 0000000..ff7db1f --- /dev/null +++ b/lib/Target/ARM/ARMCallingConv.h @@ -0,0 +1,160 @@ +//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom routines for the ARM Calling Convention that +// aren't done by tablegen. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMCALLINGCONV_H +#define ARMCALLINGCONV_H + +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "ARMBaseInstrInfo.h" +#include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" +#include "ARM.h" + +namespace llvm { + +// APCS f64 is in register pairs, possibly split to stack +static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + CCState &State, bool CanFail) { + static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + + // Try to get the first register. + if (unsigned Reg = State.AllocateReg(RegList, 4)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else { + // For the 2nd half of a v2f64, do not fail. + if (CanFail) + return false; + + // Put the whole thing on the stack. + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8, 4), + LocVT, LocInfo)); + return true; + } + + // Try to get the second register. + if (unsigned Reg = State.AllocateReg(RegList, 4)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(4, 4), + LocVT, LocInfo)); + return true; +} + +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) + return false; + if (LocVT == MVT::v2f64 && + !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) + return false; + return true; // we handled it +} + +// AAPCS f64 is in aligned register pairs +static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + CCState &State, bool CanFail) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; + + unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); + if (Reg == 0) { + // For the 2nd half of a v2f64, do not just fail. + if (CanFail) + return false; + + // Put the whole thing on the stack. + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8, 8), + LocVT, LocInfo)); + return true; + } + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + unsigned T = State.AllocateReg(LoRegList[i]); + (void)T; + assert(T == LoRegList[i] && "Could not allocate register"); + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + LocVT, LocInfo)); + return true; +} + +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) + return false; + if (LocVT == MVT::v2f64 && + !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) + return false; + return true; // we handled it +} + +static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + LocVT, LocInfo)); + return true; +} + +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) + return false; + if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) + return false; + return true; // we handled it +} + +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 293e32a..426ba13 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -53,6 +53,34 @@ def RetCC_ARM_APCS : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// ARM APCS Calling Convention for FastCC (when VFP2 or later is available) +//===----------------------------------------------------------------------===// +def FastCC_ARM_APCS : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo<CC_ARM_APCS> +]>; + +def RetFastCC_ARM_APCS : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo<RetCC_ARM_APCS> +]>; + + +//===----------------------------------------------------------------------===// // ARM AAPCS (EABI) Calling Convention, common parts //===----------------------------------------------------------------------===// @@ -105,6 +133,7 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// // ARM AAPCS-VFP (EABI) Calling Convention +// Also used for FastCC (when VFP2 or later is available) //===----------------------------------------------------------------------===// def CC_ARM_AAPCS_VFP : CallingConv<[ diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index b1a702f..9bbf6a0 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -74,7 +74,7 @@ namespace { /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI); + unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; bool runOnMachineFunction(MachineFunction &MF); @@ -101,7 +101,6 @@ namespace { unsigned OpIdx); unsigned getMachineSoImmOpValue(unsigned SoImm); - unsigned getAddrModeSBit(const MachineInstr &MI, const TargetInstrDesc &TID) const; @@ -140,8 +139,6 @@ namespace { void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI); - void emitMiscInstruction(const MachineInstr &MI); - void emitNEONLaneInstruction(const MachineInstr &MI); void emitNEONDupInstruction(const MachineInstr &MI); void emitNEON1RegModImmInstruction(const MachineInstr &MI); @@ -150,20 +147,176 @@ namespace { /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO); - unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) { + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const { return getMachineOpValue(MI, MI.getOperand(OpIdx)); } + // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the + // TableGen'erated getBinaryCodeForInstr() function to encode any + // operand values, instead querying getMachineOpValue() directly for + // each operand it needs to encode. Thus, any of the new encoder + // helper functions can simply return 0 as the values the return + // are already handled elsewhere. They are placeholders to allow this + // encoder to continue to function until the MC encoder is sufficiently + // far along that this one can be eliminated entirely. + unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) + const { return 0; } + unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } + unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } + unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) + const { return 0; } + unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI, + unsigned Op) const { return 0; } + unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op) + const { return 0; } + unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, + unsigned Op) const { return 0; } + unsigned getMsbOpValue(const MachineInstr &MI, + unsigned Op) const { return 0; } + uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx) + const {return 0; } + uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0; } + + unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op) + const { + // {17-13} = reg + // {12} = (U)nsigned (add == '1', sub == '0') + // {11-0} = imm12 + const MachineOperand &MO = MI.getOperand(Op); + const MachineOperand &MO1 = MI.getOperand(Op + 1); + if (!MO.isReg()) { + emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); + return 0; + } + unsigned Reg = getARMRegisterNumbering(MO.getReg()); + int32_t Imm12 = MO1.getImm(); + uint32_t Binary; + Binary = Imm12 & 0xfff; + if (Imm12 >= 0) + Binary |= (1 << 12); + Binary |= (Reg << 13); + return Binary; + } + + unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const { + return 0; + } + + uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const { + // {17-13} = reg + // {12} = (U)nsigned (add == '1', sub == '0') + // {11-0} = imm12 + const MachineOperand &MO = MI.getOperand(Op); + const MachineOperand &MO1 = MI.getOperand(Op + 1); + if (!MO.isReg()) { + emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); + return 0; + } + unsigned Reg = getARMRegisterNumbering(MO.getReg()); + int32_t Imm12 = MO1.getImm(); + + // Special value for #-0 + if (Imm12 == INT32_MIN) + Imm12 = 0; + + // Immediate is always encoded as positive. The 'U' bit controls add vs + // sub. + bool isAdd = true; + if (Imm12 < 0) { + Imm12 = -Imm12; + isAdd = false; + } + + uint32_t Binary = Imm12 & 0xfff; + if (isAdd) + Binary |= (1 << 12); + Binary |= (Reg << 13); + return Binary; + } + unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + + unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the /// machine operand requires relocation, record the relocation and return /// zero. unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, unsigned Reloc); - unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, - unsigned Reloc) { - return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc); - } /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// @@ -173,12 +326,12 @@ namespace { /// fixed up by the relocation stage. void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, bool MayNeedFarStub, bool Indirect, - intptr_t ACPV = 0); - void emitExternalSymbolAddress(const char *ES, unsigned Reloc); - void emitConstPoolAddress(unsigned CPI, unsigned Reloc); - void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc); + intptr_t ACPV = 0) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, - intptr_t JTBase = 0); + intptr_t JTBase = 0) const; }; } @@ -266,9 +419,9 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) { + const MachineOperand &MO) const { if (MO.isReg()) - return ARMRegisterInfo::getRegisterNumbering(MO.getReg()); + return getARMRegisterNumbering(MO.getReg()); else if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); else if (MO.isGlobal()) @@ -285,12 +438,8 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative); else if (MO.isMBB()) emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); - else { -#ifndef NDEBUG - errs() << MO; -#endif - llvm_unreachable(0); - } + else + llvm_unreachable("Unable to encode MachineOperand!"); return 0; } @@ -298,7 +447,7 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, /// void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, bool MayNeedFarStub, bool Indirect, - intptr_t ACPV) { + intptr_t ACPV) const { MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, const_cast<GlobalValue *>(GV), @@ -312,7 +461,8 @@ void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, /// emitExternalSymbolAddress - Arrange for the address of an external symbol to /// be emitted to the current location in the function, and allow it to be PC /// relative. -void ARMCodeEmitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) { +void ARMCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), Reloc, ES)); } @@ -320,7 +470,7 @@ void ARMCodeEmitter::emitExternalSymbolAddress(const char *ES, unsigned Reloc) { /// emitConstPoolAddress - Arrange for the address of an constant pool /// to be emitted to the current location in the function, and allow it to be PC /// relative. -void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) { +void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { // Tell JIT emitter we'll resolve the address. MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), Reloc, CPI, 0, true)); @@ -329,14 +479,16 @@ void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) { /// emitJumpTableAddress - Arrange for the address of a jump table to /// be emitted to the current location in the function, and allow it to be PC /// relative. -void ARMCodeEmitter::emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) { +void ARMCodeEmitter:: +emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), Reloc, JTIndex, 0, true)); } /// emitMachineBasicBlock - Emit the specified address basic block. void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, - unsigned Reloc, intptr_t JTBase) { + unsigned Reloc, + intptr_t JTBase) const { MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), Reloc, BB, JTBase)); } @@ -364,6 +516,14 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { llvm_unreachable("Unhandled instruction encoding format!"); break; } + case ARMII::MiscFrm: + if (MI.getOpcode() == ARM::LEApcrelJT) { + // Materialize jumptable address. + emitLEApcrelJTInstruction(MI); + break; + } + llvm_unreachable("Unhandled instruction encoding!"); + break; case ARMII::Pseudo: emitPseudoInstruction(MI); break; @@ -418,9 +578,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { case ARMII::VFPLdStMulFrm: emitVFPLoadStoreMultipleInstruction(MI); break; - case ARMII::VFPMiscFrm: - emitMiscInstruction(MI); - break; + // NEON instructions. case ARMII::NGetLnFrm: case ARMII::NSetLnFrm: @@ -488,7 +646,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false); emitWordLE(0); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - uint32_t Val = *(uint32_t*)CI->getValue().getRawData(); + uint32_t Val = uint32_t(*CI->getValue().getRawData()); emitWordLE(Val); } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { if (CFP->getType()->isFloatTy()) @@ -588,7 +746,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); // Emit the 'add' instruction. - unsigned Binary = 0x4 << 21; // add: Insts{24-31} = 0b0100 + unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100 // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; @@ -600,7 +758,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; // Encode Rn which is PC. - Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; // Encode the displacement. Binary |= 1 << ARMII::I_BitShift; @@ -628,7 +786,7 @@ void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) { // Encode the shift operation. switch (Opcode) { default: break; - case ARM::MOVrx: + case ARM::RRX: // rrx Binary |= 0x6 << 4; break; @@ -659,10 +817,10 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); - case ARM::BX: - case ARM::BMOVPCRX: - case ARM::BXr9: - case ARM::BMOVPCRXr9: { + case ARM::BX_CALL: + case ARM::BMOVPCRX_CALL: + case ARM::BXr9_CALL: + case ARM::BMOVPCRXr9_CALL: { // First emit mov lr, pc unsigned Binary = 0x01a0e00f; Binary |= II->getPredicate(&MI) << ARMII::CondShift; @@ -720,18 +878,18 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } case ARM::MOVi32imm: - emitMOVi32immInstruction(MI); - break; - - case ARM::MOVi2pieces: // Two instructions to materialize a constant. - emitMOVi2piecesInstruction(MI); + if (Subtarget->hasV6T2Ops()) + emitMOVi32immInstruction(MI); + else + emitMOVi2piecesInstruction(MI); break; + case ARM::LEApcrelJT: // Materialize jumptable address. emitLEApcrelJTInstruction(MI); break; - case ARM::MOVrx: + case ARM::RRX: case ARM::MOVsrl_flag: case ARM::MOVsra_flag: emitPseudoMoveInstruction(MI); @@ -789,8 +947,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, if (Rs) { // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | - (ARMRegisterInfo::getRegisterNumbering(Rs) << ARMII::RegRsShift); + return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); } // Encode shift_imm bit[11:7]. @@ -841,8 +998,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; else if (ImplicitRd) // Special handling for implicit use (e.g. PC). - Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) - << ARMII::RegRdShift); + Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); if (TID.Opcode == ARM::MOVi16) { // Get immediate from MI. @@ -892,8 +1048,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, if (!isUnary) { if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) - << ARMII::RegRnShift); + Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); else { Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; ++OpIdx; @@ -910,7 +1065,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, if (MO.isReg()) { // Encode register Rm. - emitWordLE(Binary | ARMRegisterInfo::getRegisterNumbering(MO.getReg())); + emitWordLE(Binary | getARMRegisterNumbering(MO.getReg())); return; } @@ -930,6 +1085,13 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); + // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done. + if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp || + MI.getOpcode() == ARM::STRi12) { + emitWordLE(Binary); + return; + } + // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; @@ -946,16 +1108,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, // Set first operand if (ImplicitRd) // Special handling for implicit use (e.g. PC). - Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) - << ARMII::RegRdShift); + Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) - << ARMII::RegRnShift); + Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; @@ -978,11 +1138,11 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, return; } - // Set bit I(25), because this is not in immediate enconding. + // Set bit I(25), because this is not in immediate encoding. Binary |= 1 << ARMII::I_BitShift; assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); // Set bit[3:0] to the corresponding Rm register - Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg()); + Binary |= getARMRegisterNumbering(MO2.getReg()); // If this instr is in scaled register offset/index instruction, set // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. @@ -1026,8 +1186,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn) - << ARMII::RegRnShift); + Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; @@ -1046,7 +1205,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // If this instr is in register offset/index encoding, set bit[3:0] // to the corresponding Rm register. if (MO2.getReg()) { - Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg()); + Binary |= getARMRegisterNumbering(MO2.getReg()); emitWordLE(Binary); return; } @@ -1100,8 +1259,8 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; // Set addressing mode by modifying bits U(23) and P(24) - const MachineOperand &MO = MI.getOperand(OpIdx++); - Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); + ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); // Set bit W(21) if (IsUpdating) @@ -1112,7 +1271,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) break; - unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(MO.getReg()); + unsigned RegNum = getARMRegisterNumbering(MO.getReg()); assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && RegNum < 16); Binary |= 0x1 << RegNum; @@ -1349,7 +1508,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR) // The return register is LR. - Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR); + Binary |= getARMRegisterNumbering(ARM::LR); else // otherwise, set the return register Binary |= getMachineOpValue(MI, 0); @@ -1360,8 +1519,8 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - bool isSPVFP = false; - RegD = ARMRegisterInfo::getRegisterNumbering(RegD, &isSPVFP); + bool isSPVFP = ARM::SPRRegisterClass->contains(RegD); + RegD = getARMRegisterNumbering(RegD); if (!isSPVFP) Binary |= RegD << ARMII::RegRdShift; else { @@ -1374,8 +1533,8 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - bool isSPVFP = false; - RegN = ARMRegisterInfo::getRegisterNumbering(RegN, &isSPVFP); + bool isSPVFP = ARM::SPRRegisterClass->contains(RegN); + RegN = getARMRegisterNumbering(RegN); if (!isSPVFP) Binary |= RegN << ARMII::RegRnShift; else { @@ -1388,8 +1547,8 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - bool isSPVFP = false; - RegM = ARMRegisterInfo::getRegisterNumbering(RegM, &isSPVFP); + bool isSPVFP = ARM::SPRRegisterClass->contains(RegM); + RegM = getARMRegisterNumbering(RegM); if (!isSPVFP) Binary |= RegM; else { @@ -1548,8 +1707,8 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; // Set addressing mode by modifying bits U(23) and P(24) - const MachineOperand &MO = MI.getOperand(OpIdx++); - Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); + ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); // Set bit W(21) if (IsUpdating) @@ -1576,63 +1735,10 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { - unsigned Opcode = MI.getDesc().Opcode; - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - switch(Opcode) { - default: - llvm_unreachable("ARMCodeEmitter::emitMiscInstruction"); - - case ARM::FMSTAT: - // No further encoding needed. - break; - - case ARM::VMRS: - case ARM::VMSR: { - const MachineOperand &MO0 = MI.getOperand(0); - // Encode Rt. - Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg()) - << ARMII::RegRdShift; - break; - } - - case ARM::FCONSTD: - case ARM::FCONSTS: { - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, 0); - - // Encode imm., Table A7-18 VFP modified immediate constants - const MachineOperand &MO1 = MI.getOperand(1); - unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF() - .bitcastToAPInt().getHiBits(32).getLimitedValue()); - unsigned ModifiedImm; - - if(Opcode == ARM::FCONSTS) - ModifiedImm = (Imm & 0x80000000) >> 24 | // a - (Imm & 0x03F80000) >> 19; // bcdefgh - else // Opcode == ARM::FCONSTD - ModifiedImm = (Imm & 0x80000000) >> 24 | // a - (Imm & 0x007F0000) >> 16; // bcdefgh - - // Insts{19-16} = abcd, Insts{3-0} = efgh - Binary |= ((ModifiedImm & 0xF0) >> 4) << 16; - Binary |= (ModifiedImm & 0xF); - break; - } - } - - emitWordLE(Binary); -} - static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegD = ARMRegisterInfo::getRegisterNumbering(RegD); + RegD = getARMRegisterNumbering(RegD); Binary |= (RegD & 0xf) << ARMII::RegRdShift; Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; return Binary; @@ -1641,7 +1747,7 @@ static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) { static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegN = ARMRegisterInfo::getRegisterNumbering(RegN); + RegN = getARMRegisterNumbering(RegN); Binary |= (RegN & 0xf) << ARMII::RegRnShift; Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; return Binary; @@ -1650,7 +1756,7 @@ static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) { static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegM = ARMRegisterInfo::getRegisterNumbering(RegM); + RegM = getARMRegisterNumbering(RegM); Binary |= (RegM & 0xf); Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; return Binary; @@ -1684,7 +1790,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); - RegT = ARMRegisterInfo::getRegisterNumbering(RegT); + RegT = getARMRegisterNumbering(RegT); Binary |= (RegT << ARMII::RegRdShift); Binary |= encodeNEONRn(MI, RegNOpIdx); @@ -1713,7 +1819,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; unsigned RegT = MI.getOperand(1).getReg(); - RegT = ARMRegisterInfo::getRegisterNumbering(RegT); + RegT = getARMRegisterNumbering(RegT); Binary |= (RegT << ARMII::RegRdShift); Binary |= encodeNEONRn(MI, 0); emitWordLE(Binary); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 60e923b..13d1b33 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1,4 +1,4 @@ -//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===// +//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===// // // The LLVM Compiler Infrastructure // @@ -316,7 +316,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { } /// The next UID to take is the first unused one. - AFI->initConstPoolEntryUId(CPEMIs.size()); + AFI->initPICLabelUId(CPEMIs.size()); // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the @@ -327,7 +327,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { /// Remove dead constant pool entries. - RemoveUnusedCPEntries(); + MadeChange |= RemoveUnusedCPEntries(); // Iteratively place constant pool entries and fix up branches until there // is no change. @@ -368,6 +368,14 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); + // Save the mapping between original and cloned constpool entries. + for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { + for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) { + const CPEntry & CPE = CPEntries[i][j]; + AFI->recordCPEClone(i, CPE.CPI); + } + } + DEBUG(errs() << '\n'; dumpBBs()); BBSizes.clear(); @@ -482,7 +490,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, HasInlineAsm = true; } - // Now go back through the instructions and build up our data structures + // Now go back through the instructions and build up our data structures. unsigned Offset = 0; for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { @@ -603,7 +611,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, Scale = 4; break; - case ARM::LDR: + case ARM::LDRi12: case ARM::LDRcp: case ARM::t2LDRpci: Bits = 12; // +-offset_12 @@ -611,7 +619,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, break; case ARM::tLDRpci: - case ARM::tLDRcp: Bits = 8; Scale = 4; // +(offset_8*4) break; @@ -692,7 +699,7 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS, /// machine function, it upsets all of the block numbers. Renumber the blocks /// and update the arrays that parallel this numbering. void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { - // Renumber the MBB's to keep them consequtive. + // Renumber the MBB's to keep them consecutive. NewBB->getParent()->RenumberBlocks(NewBB); // Insert a size into BBSizes to align it properly with the (newly @@ -1242,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, // No existing clone of this CPE is within range. // We will be generating a new clone. Get a UID for it. - unsigned ID = AFI->createConstPoolEntryUId(); + unsigned ID = AFI->createPICLabelUId(); // Look for water where we can place this CPE. MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); @@ -1644,7 +1651,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { unsigned DestOffset = BBOffsets[DestBB->getNumber()]; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI; - if (CmpMI->getOpcode() == ARM::tCMPzi8) { + if (CmpMI->getOpcode() == ARM::tCMPi8) { unsigned Reg = CmpMI->getOperand(0).getReg(); Pred = llvm::getInstrPredicate(CmpMI, PredReg); if (Pred == ARMCC::AL && @@ -1766,7 +1773,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { if (!OptOk) continue; - unsigned Opc = ByteOk ? ARM::t2TBB : ARM::t2TBH; + unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index f13ccc6..165a1d8 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -24,7 +24,7 @@ using namespace llvm; ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id, ARMCP::ARMCPKind K, unsigned char PCAdj, - const char *Modif, + ARMCP::ARMCPModifier Modif, bool AddCA) : MachineConstantPoolValue((const Type*)cval->getType()), CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj), @@ -33,17 +33,17 @@ ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id, ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id, unsigned char PCAdj, - const char *Modif, + ARMCP::ARMCPModifier Modif, bool AddCA) : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)), CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol), PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv, - const char *Modif) + ARMCP::ARMCPModifier Modif) : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())), CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), - Modifier(Modif) {} + Modifier(Modif), AddCurrentAddress(false) {} const GlobalValue *ARMConstantPoolValue::getGV() const { return dyn_cast_or_null<GlobalValue>(CVal); @@ -53,6 +53,14 @@ const BlockAddress *ARMConstantPoolValue::getBlockAddress() const { return dyn_cast_or_null<BlockAddress>(CVal); } +static bool CPV_streq(const char *S1, const char *S2) { + if (S1 == S2) + return true; + if (S1 && S2 && strcmp(S1, S2) == 0) + return true; + return false; +} + int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; @@ -65,8 +73,8 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, if (CPV->CVal == CVal && CPV->LabelId == LabelId && CPV->PCAdjust == PCAdjust && - (CPV->S == S || strcmp(CPV->S, S) == 0) && - (CPV->Modifier == Modifier || strcmp(CPV->Modifier, Modifier) == 0)) + CPV_streq(CPV->S, S) && + CPV->Modifier == Modifier) return i; } } @@ -91,8 +99,8 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { if (ACPV->Kind == Kind && ACPV->CVal == CVal && ACPV->PCAdjust == PCAdjust && - (ACPV->S == S || strcmp(ACPV->S, S) == 0) && - (ACPV->Modifier == Modifier || strcmp(ACPV->Modifier, Modifier) == 0)) { + CPV_streq(ACPV->S, S) && + ACPV->Modifier == Modifier) { if (ACPV->LabelId == LabelId) return true; // Two PC relative constpool entries containing the same GV address or @@ -113,7 +121,7 @@ void ARMConstantPoolValue::print(raw_ostream &O) const { O << CVal->getName(); else O << S; - if (Modifier) O << "(" << Modifier << ")"; + if (Modifier) O << "(" << getModifierText() << ")"; if (PCAdjust != 0) { O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust; if (AddCurrentAddress) O << "-."; diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 3119b54..d008811 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Support/ErrorHandling.h" #include <cstddef> namespace llvm { @@ -31,6 +32,15 @@ namespace ARMCP { CPBlockAddress, CPLSDA }; + + enum ARMCPModifier { + no_modifier, + TLSGD, + GOT, + GOTOFF, + GOTTPOFF, + TPOFF + }; } /// ARMConstantPoolValue - ARM specific constantpool value. This is used to @@ -43,26 +53,41 @@ class ARMConstantPoolValue : public MachineConstantPoolValue { ARMCP::ARMCPKind Kind; // Kind of constant. unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative. // 8 for ARM, 4 for Thumb. - const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8)) + ARMCP::ARMCPModifier Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8)) bool AddCurrentAddress; public: ARMConstantPoolValue(const Constant *cval, unsigned id, ARMCP::ARMCPKind Kind = ARMCP::CPValue, - unsigned char PCAdj = 0, const char *Modifier = NULL, + unsigned char PCAdj = 0, + ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier, bool AddCurrentAddress = false); ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id, - unsigned char PCAdj = 0, const char *Modifier = NULL, + unsigned char PCAdj = 0, + ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier, bool AddCurrentAddress = false); - ARMConstantPoolValue(const GlobalValue *GV, const char *Modifier); + ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier); ARMConstantPoolValue(); ~ARMConstantPoolValue(); const GlobalValue *getGV() const; const char *getSymbol() const { return S; } const BlockAddress *getBlockAddress() const; - const char *getModifier() const { return Modifier; } - bool hasModifier() const { return Modifier != NULL; } + ARMCP::ARMCPModifier getModifier() const { return Modifier; } + const char *getModifierText() const { + switch (Modifier) { + default: llvm_unreachable("Unknown modifier!"); + // FIXME: Are these case sensitive? It'd be nice to lower-case all the + // strings if that's legal. + case ARMCP::no_modifier: return "none"; + case ARMCP::TLSGD: return "tlsgd"; + case ARMCP::GOT: return "GOT"; + case ARMCP::GOTOFF: return "GOTOFF"; + case ARMCP::GOTTPOFF: return "gottpoff"; + case ARMCP::TPOFF: return "tpoff"; + } + } + bool hasModifier() const { return Modifier != ARMCP::no_modifier; } bool mustAddCurrentAddress() const { return AddCurrentAddress; } unsigned getLabelId() const { return LabelId; } unsigned char getPCAdjustment() const { return PCAdjust; } @@ -71,11 +96,7 @@ public: bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; } bool isLSDA() { return Kind == ARMCP::CPLSDA; } - virtual unsigned getRelocationInfo() const { - // FIXME: This is conservatively claiming that these entries require a - // relocation, we may be able to do better than this. - return 2; - } + virtual unsigned getRelocationInfo() const { return 2; } virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment); diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp new file mode 100644 index 0000000..51e68b4 --- /dev/null +++ b/lib/Target/ARM/ARMELFWriterInfo.cpp @@ -0,0 +1,83 @@ +//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the ARM backend. +// +//===----------------------------------------------------------------------===// + +#include "ARMELFWriterInfo.h" +#include "ARMRelocations.h" +#include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Implementation of the ARMELFWriterInfo class +//===----------------------------------------------------------------------===// + +ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM) + : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64, + TM.getTargetData()->isLittleEndian()) { +} + +ARMELFWriterInfo::~ARMELFWriterInfo() {} + +unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { + switch (MachineRelTy) { + case ARM::reloc_arm_absolute: + case ARM::reloc_arm_relative: + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + case ARM::reloc_arm_machine_cp_entry: + case ARM::reloc_arm_jt_base: + case ARM::reloc_arm_pic_jt: + assert(0 && "unsupported ARM relocation type"); break; + + case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break; + case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break; + case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break; + default: + llvm_unreachable("unknown ARM relocation type"); break; + } + return 0; +} + +long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier) const { + assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented"); + return 0; +} + +unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const { + assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented"); + return 0; +} + +bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { + assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented"); + return 1; +} + +unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const { + assert(0 && + "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented"); + return 0; +} + +long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset, + unsigned RelOffset, + unsigned RelTy) const { + assert(0 && + "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented"); + return 0; +} diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h new file mode 100644 index 0000000..1c4e532 --- /dev/null +++ b/lib/Target/ARM/ARMELFWriterInfo.h @@ -0,0 +1,58 @@ +//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the ARM backend. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_ELF_WRITER_INFO_H +#define ARM_ELF_WRITER_INFO_H + +#include "llvm/Target/TargetELFWriterInfo.h" + +namespace llvm { + + class ARMELFWriterInfo : public TargetELFWriterInfo { + public: + ARMELFWriterInfo(TargetMachine &TM); + virtual ~ARMELFWriterInfo(); + + /// getRelocationType - Returns the target specific ELF Relocation type. + /// 'MachineRelTy' contains the object code independent relocation type + virtual unsigned getRelocationType(unsigned MachineRelTy) const; + + /// hasRelocationAddend - True if the target uses an addend in the + /// ELF relocation entry. + virtual bool hasRelocationAddend() const { return false; } + + /// getDefaultAddendForRelTy - Gets the default addend value for a + /// relocation entry based on the target ELF relocation type. + virtual long int getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier = 0) const; + + /// getRelTySize - Returns the size of relocatable field in bits + virtual unsigned getRelocationTySize(unsigned RelTy) const; + + /// isPCRelativeRel - True if the relocation type is pc relative + virtual bool isPCRelativeRel(unsigned RelTy) const; + + /// getJumpTableRelocationTy - Returns the machine relocation type used + /// to reference a jumptable. + virtual unsigned getAbsoluteLabelMachineRelTy() const; + + /// computeRelocation - Some relocatable fields could be relocated + /// directly, avoiding the relocation symbol emission, compute the + /// final relocation value for this symbol. + virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, + unsigned RelTy) const; + }; + +} // end llvm namespace + +#endif // ARM_ELF_WRITER_INFO_H diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index fc2e3c3..bd753d2 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -7,36 +7,38 @@ // //===----------------------------------------------------------------------===// // -// This file contains a pass that expand pseudo instructions into target +// This file contains a pass that expands pseudo instructions into target // instructions to allow proper scheduling, if-conversion, and other late // optimizations. This pass should be run after register allocation but before -// post- regalloc scheduling pass. +// the post-regalloc scheduling pass. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-pseudo" #include "ARM.h" +#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! using namespace llvm; namespace { class ARMExpandPseudo : public MachineFunctionPass { - // Constants for register spacing in NEON load/store instructions. - enum NEONRegSpacing { - SingleSpc, - EvenDblSpc, - OddDblSpc - }; - public: static char ID; ARMExpandPseudo() : MachineFunctionPass(ID) {} - const TargetInstrInfo *TII; + const ARMBaseInstrInfo *TII; const TargetRegisterInfo *TRI; + const ARMSubtarget *STI; + ARMFunctionInfo *AFI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -47,11 +49,16 @@ namespace { private: void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); + bool ExpandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool ExpandMBB(MachineBasicBlock &MBB); - void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc, - bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); - void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc, - bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs); + void ExpandVLD(MachineBasicBlock::iterator &MBBI); + void ExpandVST(MachineBasicBlock::iterator &MBBI); + void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); + void ExpandVTBL(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool IsExt, unsigned NumRegs); + void ExpandMOV32BitImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI); }; char ARMExpandPseudo::ID = 0; } @@ -67,44 +74,349 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, const MachineOperand &MO = OldMI.getOperand(i); assert(MO.isReg() && MO.getReg()); if (MO.isUse()) - UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill())); + UseMI.addOperand(MO); else - DefMI.addReg(MO.getReg(), - getDefRegState(true) | getDeadRegState(MO.isDead())); + DefMI.addOperand(MO); + } +} + +namespace { + // Constants for register spacing in NEON load/store instructions. + // For quad-register load-lane and store-lane pseudo instructors, the + // spacing is initially assumed to be EvenDblSpc, and that is changed to + // OddDblSpc depending on the lane number operand. + enum NEONRegSpacing { + SingleSpc, + EvenDblSpc, + OddDblSpc + }; + + // Entries for NEON load/store information table. The table is sorted by + // PseudoOpc for fast binary-search lookups. + struct NEONLdStTableEntry { + unsigned PseudoOpc; + unsigned RealOpc; + bool IsLoad; + bool HasWriteBack; + NEONRegSpacing RegSpacing; + unsigned char NumRegs; // D registers loaded or stored + unsigned char RegElts; // elements per D register; used for lane ops + + // Comparison methods for binary search of the table. + bool operator<(const NEONLdStTableEntry &TE) const { + return PseudoOpc < TE.PseudoOpc; + } + friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { + return TE.PseudoOpc < PseudoOpc; + } + friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, + const NEONLdStTableEntry &TE) { + return PseudoOpc < TE.PseudoOpc; + } + }; +} + +static const NEONLdStTableEntry NEONLdStTable[] = { +{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4}, +{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4}, +{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2}, +{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2}, +{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8}, +{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8}, + +{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 }, +{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 }, +{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 }, +{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 }, +{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 }, +{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 }, + +{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 }, +{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 }, +{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 }, +{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 }, + +{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 }, +{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 }, +{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 }, +{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 }, +{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 }, +{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 }, +{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 }, +{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 }, + +{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4}, +{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4}, +{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2}, +{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2}, +{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8}, +{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8}, + +{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 }, +{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 }, +{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 }, +{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 }, +{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 }, +{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 }, +{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 }, +{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 }, +{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 }, +{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 }, + +{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 }, +{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 }, +{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 }, +{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 }, +{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 }, +{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 }, + +{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 }, +{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 }, +{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 }, +{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 }, +{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 }, +{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 }, + +{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4}, +{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4}, +{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2}, +{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2}, +{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8}, +{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8}, + +{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 }, +{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 }, +{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 }, +{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 }, +{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 }, +{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 }, +{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 }, +{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 }, +{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 }, +{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 }, + +{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 }, +{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 }, +{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 }, +{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 }, +{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 }, +{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 }, + +{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 }, +{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 }, +{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 }, +{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 }, +{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 }, +{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 }, +{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 }, +{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 }, +{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 }, + +{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4}, +{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4}, +{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2}, +{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2}, +{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8}, +{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8}, + +{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 }, +{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 }, +{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 }, +{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 }, +{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 }, +{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 }, +{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 }, +{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 }, +{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 }, +{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 }, + +{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 }, +{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 }, +{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 }, +{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 }, +{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 }, +{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 }, + +{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 }, +{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 }, +{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 }, +{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 }, +{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 }, +{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 }, +{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 }, +{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 }, +{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 }, + +{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 }, +{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 }, +{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 }, +{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 }, +{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 }, +{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 }, + +{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 }, +{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 }, +{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 }, +{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 }, + +{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 }, +{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 }, +{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 }, +{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 }, +{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 }, +{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 }, +{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 }, +{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 }, + +{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 }, +{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 }, +{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 }, +{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 }, +{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 }, +{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 }, +{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4}, +{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4}, +{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2}, +{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2}, + +{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 }, +{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 }, +{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 }, +{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 }, +{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 }, +{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 }, + +{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 }, +{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 }, +{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 }, +{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 }, +{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 }, +{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 }, + +{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 }, +{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 }, +{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 }, +{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 }, +{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 }, +{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 }, +{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4}, +{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4}, +{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2}, +{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2}, + +{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 }, +{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 }, +{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 }, +{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 }, +{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 }, +{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 }, + +{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 }, +{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 }, +{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 }, +{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 }, +{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 }, +{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 }, +{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 }, +{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 }, +{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 }, + +{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 }, +{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 }, +{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 }, +{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 }, +{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 }, +{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 }, +{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4}, +{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4}, +{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2}, +{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2}, + +{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 }, +{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 }, +{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 }, +{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 }, +{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 }, +{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 }, + +{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 }, +{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 }, +{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 }, +{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 }, +{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 }, +{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 }, +{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 }, +{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 }, +{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 } +}; + +/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON +/// load or store pseudo instruction. +static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { + unsigned NumEntries = array_lengthof(NEONLdStTable); + +#ifndef NDEBUG + // Make sure the table is sorted. + static bool TableChecked = false; + if (!TableChecked) { + for (unsigned i = 0; i != NumEntries-1; ++i) + assert(NEONLdStTable[i] < NEONLdStTable[i+1] && + "NEONLdStTable is not sorted!"); + TableChecked = true; + } +#endif + + const NEONLdStTableEntry *I = + std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode); + if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode) + return I; + return NULL; +} + +/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, +/// corresponding to the specified register spacing. Not all of the results +/// are necessarily valid, e.g., a Q register only has 2 D subregisters. +static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc, + const TargetRegisterInfo *TRI, unsigned &D0, + unsigned &D1, unsigned &D2, unsigned &D3) { + if (RegSpc == SingleSpc) { + D0 = TRI->getSubReg(Reg, ARM::dsub_0); + D1 = TRI->getSubReg(Reg, ARM::dsub_1); + D2 = TRI->getSubReg(Reg, ARM::dsub_2); + D3 = TRI->getSubReg(Reg, ARM::dsub_3); + } else if (RegSpc == EvenDblSpc) { + D0 = TRI->getSubReg(Reg, ARM::dsub_0); + D1 = TRI->getSubReg(Reg, ARM::dsub_2); + D2 = TRI->getSubReg(Reg, ARM::dsub_4); + D3 = TRI->getSubReg(Reg, ARM::dsub_6); + } else { + assert(RegSpc == OddDblSpc && "unknown register spacing"); + D0 = TRI->getSubReg(Reg, ARM::dsub_1); + D1 = TRI->getSubReg(Reg, ARM::dsub_3); + D2 = TRI->getSubReg(Reg, ARM::dsub_5); + D3 = TRI->getSubReg(Reg, ARM::dsub_7); } } /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register /// operands to real VLD instructions with D register operands. -void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool hasWriteBack, - NEONRegSpacing RegSpc, unsigned NumRegs) { +void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); + assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); + NEONRegSpacing RegSpc = TableEntry->RegSpacing; + unsigned NumRegs = TableEntry->NumRegs; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; bool DstIsDead = MI.getOperand(OpIdx).isDead(); unsigned DstReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; - if (RegSpc == SingleSpc) { - D0 = TRI->getSubReg(DstReg, ARM::dsub_0); - D1 = TRI->getSubReg(DstReg, ARM::dsub_1); - D2 = TRI->getSubReg(DstReg, ARM::dsub_2); - D3 = TRI->getSubReg(DstReg, ARM::dsub_3); - } else if (RegSpc == EvenDblSpc) { - D0 = TRI->getSubReg(DstReg, ARM::dsub_0); - D1 = TRI->getSubReg(DstReg, ARM::dsub_2); - D2 = TRI->getSubReg(DstReg, ARM::dsub_4); - D3 = TRI->getSubReg(DstReg, ARM::dsub_6); - } else { - assert(RegSpc == OddDblSpc && "unknown register spacing for VLD"); - D0 = TRI->getSubReg(DstReg, ARM::dsub_1); - D1 = TRI->getSubReg(DstReg, ARM::dsub_3); - D2 = TRI->getSubReg(DstReg, ARM::dsub_5); - D3 = TRI->getSubReg(DstReg, ARM::dsub_7); - } + GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); if (NumRegs > 2) @@ -112,107 +424,373 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI, if (NumRegs > 3) MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); - if (hasWriteBack) { - bool WBIsDead = MI.getOperand(OpIdx).isDead(); - unsigned WBReg = MI.getOperand(OpIdx++).getReg(); - MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead)); - } + if (TableEntry->HasWriteBack) + MIB.addOperand(MI.getOperand(OpIdx++)); + // Copy the addrmode6 operands. - bool AddrIsKill = MI.getOperand(OpIdx).isKill(); - MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill)); - MIB.addImm(MI.getOperand(OpIdx++).getImm()); - if (hasWriteBack) { - // Copy the am6offset operand. - bool OffsetIsKill = MI.getOperand(OpIdx).isKill(); - MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill)); - } + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + // Copy the am6offset operand. + if (TableEntry->HasWriteBack) + MIB.addOperand(MI.getOperand(OpIdx++)); - MIB = AddDefaultPred(MIB); - TransferImpOps(MI, MIB, MIB); - // For an instruction writing the odd subregs, add an implicit use of the - // super-register because the even subregs were loaded separately. - if (RegSpc == OddDblSpc) - MIB.addReg(DstReg, RegState::Implicit); + // For an instruction writing double-spaced subregs, the pseudo instruction + // has an extra operand that is a use of the super-register. Record the + // operand index and skip over it. + unsigned SrcOpIdx = 0; + if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc) + SrcOpIdx = OpIdx++; + + // Copy the predicate operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Copy the super-register source operand used for double-spaced subregs over + // to the new instruction as an implicit operand. + if (SrcOpIdx != 0) { + MachineOperand MO = MI.getOperand(SrcOpIdx); + MO.setImplicit(true); + MIB.addOperand(MO); + } // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); + TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); } /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register /// operands to real VST instructions with D register operands. -void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool hasWriteBack, - NEONRegSpacing RegSpc, unsigned NumRegs) { +void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); + assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); + NEONRegSpacing RegSpc = TableEntry->RegSpacing; + unsigned NumRegs = TableEntry->NumRegs; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; - if (hasWriteBack) { - bool DstIsDead = MI.getOperand(OpIdx).isDead(); - unsigned DstReg = MI.getOperand(OpIdx++).getReg(); - MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); - } + if (TableEntry->HasWriteBack) + MIB.addOperand(MI.getOperand(OpIdx++)); + // Copy the addrmode6 operands. - bool AddrIsKill = MI.getOperand(OpIdx).isKill(); - MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill)); - MIB.addImm(MI.getOperand(OpIdx++).getImm()); - if (hasWriteBack) { - // Copy the am6offset operand. - bool OffsetIsKill = MI.getOperand(OpIdx).isKill(); - MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill)); - } + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + // Copy the am6offset operand. + if (TableEntry->HasWriteBack) + MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); - unsigned SrcReg = MI.getOperand(OpIdx).getReg(); + unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; - if (RegSpc == SingleSpc) { - D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); - D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); - D2 = TRI->getSubReg(SrcReg, ARM::dsub_2); - D3 = TRI->getSubReg(SrcReg, ARM::dsub_3); - } else if (RegSpc == EvenDblSpc) { - D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); - D1 = TRI->getSubReg(SrcReg, ARM::dsub_2); - D2 = TRI->getSubReg(SrcReg, ARM::dsub_4); - D3 = TRI->getSubReg(SrcReg, ARM::dsub_6); - } else { - assert(RegSpc == OddDblSpc && "unknown register spacing for VST"); - D0 = TRI->getSubReg(SrcReg, ARM::dsub_1); - D1 = TRI->getSubReg(SrcReg, ARM::dsub_3); - D2 = TRI->getSubReg(SrcReg, ARM::dsub_5); - D3 = TRI->getSubReg(SrcReg, ARM::dsub_7); - } - + GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); MIB.addReg(D0).addReg(D1); if (NumRegs > 2) MIB.addReg(D2); if (NumRegs > 3) MIB.addReg(D3); - MIB = AddDefaultPred(MIB); + + // Copy the predicate operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + + if (SrcIsKill) + // Add an implicit kill for the super-reg. + (*MIB).addRegisterKilled(SrcReg, TRI, true); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); +} + +/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ +/// register operands to real instructions with D register operands. +void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); + assert(TableEntry && "NEONLdStTable lookup failed"); + NEONRegSpacing RegSpc = TableEntry->RegSpacing; + unsigned NumRegs = TableEntry->NumRegs; + unsigned RegElts = TableEntry->RegElts; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(TableEntry->RealOpc)); + unsigned OpIdx = 0; + // The lane operand is always the 3rd from last operand, before the 2 + // predicate operands. + unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm(); + + // Adjust the lane and spacing as needed for Q registers. + assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane"); + if (RegSpc == EvenDblSpc && Lane >= RegElts) { + RegSpc = OddDblSpc; + Lane -= RegElts; + } + assert(Lane < RegElts && "out of range lane for VLD/VST-lane"); + + unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0; + unsigned DstReg = 0; + bool DstIsDead = false; + if (TableEntry->IsLoad) { + DstIsDead = MI.getOperand(OpIdx).isDead(); + DstReg = MI.getOperand(OpIdx++).getReg(); + GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 1) + MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 2) + MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 3) + MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); + } + + if (TableEntry->HasWriteBack) + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Copy the addrmode6 operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + // Copy the am6offset operand. + if (TableEntry->HasWriteBack) + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Grab the super-register source. + MachineOperand MO = MI.getOperand(OpIdx++); + if (!TableEntry->IsLoad) + GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3); + + // Add the subregs as sources of the new instruction. + unsigned SrcFlags = (getUndefRegState(MO.isUndef()) | + getKillRegState(MO.isKill())); + MIB.addReg(D0, SrcFlags); + if (NumRegs > 1) + MIB.addReg(D1, SrcFlags); + if (NumRegs > 2) + MIB.addReg(D2, SrcFlags); + if (NumRegs > 3) + MIB.addReg(D3, SrcFlags); + + // Add the lane number operand. + MIB.addImm(Lane); + OpIdx += 1; + + // Copy the predicate operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Copy the super-register source to be an implicit source. + MO.setImplicit(true); + MIB.addOperand(MO); + if (TableEntry->IsLoad) + // Add an implicit def for the super-register. + MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); +} + +/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ +/// register operands to real instructions with D register operands. +void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, + unsigned Opc, bool IsExt, unsigned NumRegs) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + unsigned OpIdx = 0; + + // Transfer the destination register operand. + MIB.addOperand(MI.getOperand(OpIdx++)); + if (IsExt) + MIB.addOperand(MI.getOperand(OpIdx++)); + + bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + unsigned D0, D1, D2, D3; + GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); + MIB.addReg(D0).addReg(D1); + if (NumRegs > 2) + MIB.addReg(D2); + if (NumRegs > 3) + MIB.addReg(D3); + + // Copy the other source register operand. + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Copy the predicate operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + if (SrcIsKill) // Add an implicit kill for the super-reg. (*MIB).addRegisterKilled(SrcReg, TRI, true); + TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); } -bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { - bool Modified = false; +void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; + const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); + MachineInstrBuilder LO16, HI16; - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineInstr &MI = *MBBI; - MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + if (!STI->hasV6T2Ops() && + (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { + // Expand into a movi + orr. + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); + unsigned ImmVal = (unsigned)MO.getImm(); + unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); + unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); + LO16 = LO16.addImm(SOImmValV1); + HI16 = HI16.addImm(SOImmValV2); + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg).addReg(0); + HI16.addImm(Pred).addReg(PredReg).addReg(0); + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); + return; + } + + unsigned LO16Opc = 0; + unsigned HI16Opc = 0; + if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { + LO16Opc = ARM::t2MOVi16; + HI16Opc = ARM::t2MOVTi16; + } else { + LO16Opc = ARM::MOVi16; + HI16Opc = ARM::MOVTi16; + } - bool ModifiedOp = true; - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + } + + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); +} + +bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { default: - ModifiedOp = false; - break; + return false; + case ARM::Int_eh_sjlj_dispatchsetup: { + MachineFunction &MF = *MI.getParent()->getParent(); + const ARMBaseInstrInfo *AII = + static_cast<const ARMBaseInstrInfo*>(TII); + const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); + // For functions using a base pointer, we rematerialize it (via the frame + // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it + // for us. Otherwise, expand to nothing. + if (RI.hasBasePointer(MF)) { + int32_t NumBytes = AFI->getFramePtrSpillOffset(); + unsigned FramePtr = RI.getFrameRegister(MF); + assert(MF.getTarget().getFrameLowering()->hasFP(MF) && + "base pointer without frame pointer?"); + + if (AFI->isThumb2Function()) { + llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *TII); + } else if (AFI->isThumbFunction()) { + llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6, + FramePtr, -NumBytes, + *TII, RI, MI.getDebugLoc()); + } else { + llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, + *TII); + } + // If there's dynamic realignment, adjust for it. + if (RI.needsStackRealignment(MF)) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + assert (!AFI->isThumb1OnlyFunction()); + // Emit bic r6, r6, MaxAlign + unsigned bicOpc = AFI->isThumbFunction() ? + ARM::t2BICri : ARM::BICri; + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(bicOpc), ARM::R6) + .addReg(ARM::R6, RegState::Kill) + .addImm(MaxAlign-1))); + } + + } + MI.eraseFromParent(); + return true; + } - case ARM::tLDRpci_pic: + case ARM::MOVsrl_flag: + case ARM::MOVsra_flag: { + // These are just fancy MOVs insructions. + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + MI.getOperand(0).getReg()) + .addOperand(MI.getOperand(1)) + .addReg(0) + .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr + : ARM_AM::asr), 1))) + .addReg(ARM::CPSR, RegState::Define); + MI.eraseFromParent(); + return true; + } + case ARM::RRX: { + // This encodes as "MOVs Rd, Rm, rrx + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + MI.getOperand(0).getReg()) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(1)) + .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))) + .addReg(0); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } + case ARM::TPsoft: { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::BL)) + .addExternalSymbol("__aeabi_read_tp", 0); + + (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } + case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; @@ -225,54 +803,73 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) - .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg) .addOperand(MI.getOperand(2)); TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); - break; + return true; } - case ARM::MOVi32imm: - case ARM::t2MOVi32imm: { - unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + case ARM::MOV_ga_dyn: + case ARM::MOV_ga_pcrel: + case ARM::MOV_ga_pcrel_ldr: + case ARM::t2MOV_ga_dyn: + case ARM::t2MOV_ga_pcrel: { + // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. + unsigned LabelId = AFI->createPICLabelUId(); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); - const MachineOperand &MO = MI.getOperand(1); - MachineInstrBuilder LO16, HI16; - - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == ARM::MOVi32imm ? - ARM::MOVi16 : ARM::t2MOVi16), - DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == ARM::MOVi32imm ? - ARM::MOVTi16 : ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(DstReg); - - if (MO.isImm()) { - unsigned Imm = MO.getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - LO16 = LO16.addImm(Lo16); - HI16 = HI16.addImm(Hi16); - } else { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + const MachineOperand &MO1 = MI.getOperand(1); + const GlobalValue *GV = MO1.getGlobal(); + unsigned TF = MO1.getTargetFlags(); + bool isARM = Opcode != ARM::t2MOV_ga_pcrel; + bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn); + unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; + unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel; + unsigned LO16TF = isPIC + ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY; + unsigned HI16TF = isPIC + ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY; + unsigned PICAddOpc = isARM + ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) + : ARM::tPICADD; + MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(LO16Opc), DstReg) + .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) + .addImm(LabelId); + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(HI16Opc), DstReg) + .addReg(DstReg) + .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) + .addImm(LabelId); + if (!isPIC) { + TransferImpOps(MI, MIB1, MIB2); + MI.eraseFromParent(); + return true; } - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - LO16.addImm(Pred).addReg(PredReg); - HI16.addImm(Pred).addReg(PredReg); - TransferImpOps(MI, LO16, HI16); + + MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(PICAddOpc)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg).addImm(LabelId); + if (isARM) { + AddDefaultPred(MIB3); + if (Opcode == ARM::MOV_ga_pcrel_ldr) + (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + } + TransferImpOps(MI, MIB1, MIB3); MI.eraseFromParent(); - break; + return true; } + case ARM::MOVi32imm: + case ARM::MOVCCi32imm: + case ARM::t2MOVi32imm: + case ARM::t2MOVCCi32imm: + ExpandMOV32BitImm(MBB, MBBI); + return true; + case ARM::VMOVQQ: { unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); @@ -285,222 +882,339 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstrBuilder Even = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(EvenDst, - getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(EvenSrc, getKillRegState(SrcIsKill))); + .addReg(EvenDst, + RegState::Define | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); MachineInstrBuilder Odd = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(OddDst, - getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(OddSrc, getKillRegState(SrcIsKill))); + .addReg(OddDst, + RegState::Define | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); + return true; + } + + case ARM::VLDMQIA: + case ARM::VLDMQDB: { + unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); + unsigned OpIdx = 0; + + // Grab the Q register destination. + bool DstIsDead = MI.getOperand(OpIdx).isDead(); + unsigned DstReg = MI.getOperand(OpIdx++).getReg(); + + // Copy the source register. + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Copy the predicate operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Add the destination operands (D subregs). + unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0); + unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1); + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + + // Add an implicit def for the super-register. + MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } + + case ARM::VSTMQIA: + case ARM::VSTMQDB: { + unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); + unsigned OpIdx = 0; + + // Grab the Q register source. + bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); + + // Copy the destination register. + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Copy the predicate operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + + // Add the source operands (D subregs). + unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); + unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); + MIB.addReg(D0).addReg(D1); + + if (SrcIsKill) + // Add an implicit kill for the Q register. + (*MIB).addRegisterKilled(SrcReg, TRI, true); + + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } + case ARM::VDUPfqf: + case ARM::VDUPfdf:{ + unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); + unsigned OpIdx = 0; + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; + unsigned DReg = TRI->getMatchingSuperReg(SrcReg, + Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); + // The lane is [0,1] for the containing DReg superregister. + // Copy the dst/src register operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addReg(DReg); + ++OpIdx; + // Add the lane select operand. + MIB.addImm(Lane); + // Add the predicate operands. + MIB.addOperand(MI.getOperand(OpIdx++)); + MIB.addOperand(MI.getOperand(OpIdx++)); + + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; } case ARM::VLD1q8Pseudo: - ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break; case ARM::VLD1q16Pseudo: - ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break; case ARM::VLD1q32Pseudo: - ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break; case ARM::VLD1q64Pseudo: - ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break; case ARM::VLD1q8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break; case ARM::VLD1q16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break; case ARM::VLD1q32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break; case ARM::VLD1q64Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break; - case ARM::VLD2d8Pseudo: - ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break; case ARM::VLD2d16Pseudo: - ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break; case ARM::VLD2d32Pseudo: - ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break; case ARM::VLD2q8Pseudo: - ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break; case ARM::VLD2q16Pseudo: - ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break; case ARM::VLD2q32Pseudo: - ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break; case ARM::VLD2d8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break; case ARM::VLD2d16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break; case ARM::VLD2d32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break; case ARM::VLD2q8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break; case ARM::VLD2q16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break; case ARM::VLD2q32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break; - case ARM::VLD3d8Pseudo: - ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break; case ARM::VLD3d16Pseudo: - ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break; case ARM::VLD3d32Pseudo: - ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break; case ARM::VLD1d64TPseudo: - ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break; case ARM::VLD3d8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break; case ARM::VLD3d16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break; case ARM::VLD3d32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break; case ARM::VLD1d64TPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break; case ARM::VLD3q8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break; case ARM::VLD3q16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break; case ARM::VLD3q32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VLD3q8oddPseudo: + case ARM::VLD3q16oddPseudo: + case ARM::VLD3q32oddPseudo: case ARM::VLD3q8oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break; case ARM::VLD3q16oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break; case ARM::VLD3q32oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break; - case ARM::VLD4d8Pseudo: - ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break; case ARM::VLD4d16Pseudo: - ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break; case ARM::VLD4d32Pseudo: - ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break; case ARM::VLD1d64QPseudo: - ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break; case ARM::VLD4d8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break; case ARM::VLD4d16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break; case ARM::VLD4d32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break; case ARM::VLD1d64QPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break; case ARM::VLD4q8Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break; case ARM::VLD4q16Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break; case ARM::VLD4q32Pseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VLD4q8oddPseudo: + case ARM::VLD4q16oddPseudo: + case ARM::VLD4q32oddPseudo: case ARM::VLD4q8oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break; case ARM::VLD4q16oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break; case ARM::VLD4q32oddPseudo_UPD: - ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break; + case ARM::VLD1DUPq8Pseudo: + case ARM::VLD1DUPq16Pseudo: + case ARM::VLD1DUPq32Pseudo: + case ARM::VLD1DUPq8Pseudo_UPD: + case ARM::VLD1DUPq16Pseudo_UPD: + case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD2DUPd8Pseudo: + case ARM::VLD2DUPd16Pseudo: + case ARM::VLD2DUPd32Pseudo: + case ARM::VLD2DUPd8Pseudo_UPD: + case ARM::VLD2DUPd16Pseudo_UPD: + case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD3DUPd8Pseudo: + case ARM::VLD3DUPd16Pseudo: + case ARM::VLD3DUPd32Pseudo: + case ARM::VLD3DUPd8Pseudo_UPD: + case ARM::VLD3DUPd16Pseudo_UPD: + case ARM::VLD3DUPd32Pseudo_UPD: + case ARM::VLD4DUPd8Pseudo: + case ARM::VLD4DUPd16Pseudo: + case ARM::VLD4DUPd32Pseudo: + case ARM::VLD4DUPd8Pseudo_UPD: + case ARM::VLD4DUPd16Pseudo_UPD: + case ARM::VLD4DUPd32Pseudo_UPD: + ExpandVLD(MBBI); + return true; case ARM::VST1q8Pseudo: - ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break; case ARM::VST1q16Pseudo: - ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break; case ARM::VST1q32Pseudo: - ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break; case ARM::VST1q64Pseudo: - ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break; case ARM::VST1q8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break; case ARM::VST1q16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break; case ARM::VST1q32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break; case ARM::VST1q64Pseudo_UPD: - ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break; - case ARM::VST2d8Pseudo: - ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break; case ARM::VST2d16Pseudo: - ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break; case ARM::VST2d32Pseudo: - ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break; case ARM::VST2q8Pseudo: - ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break; case ARM::VST2q16Pseudo: - ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break; case ARM::VST2q32Pseudo: - ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break; case ARM::VST2d8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break; case ARM::VST2d16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break; case ARM::VST2d32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break; case ARM::VST2q8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break; case ARM::VST2q16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break; case ARM::VST2q32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break; - case ARM::VST3d8Pseudo: - ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break; case ARM::VST3d16Pseudo: - ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break; case ARM::VST3d32Pseudo: - ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break; case ARM::VST1d64TPseudo: - ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break; case ARM::VST3d8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break; case ARM::VST3d16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break; case ARM::VST3d32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break; case ARM::VST1d64TPseudo_UPD: - ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break; case ARM::VST3q8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break; case ARM::VST3q16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break; case ARM::VST3q32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break; + case ARM::VST3q8oddPseudo: + case ARM::VST3q16oddPseudo: + case ARM::VST3q32oddPseudo: case ARM::VST3q8oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break; case ARM::VST3q16oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break; case ARM::VST3q32oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break; - case ARM::VST4d8Pseudo: - ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break; case ARM::VST4d16Pseudo: - ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break; case ARM::VST4d32Pseudo: - ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break; case ARM::VST1d64QPseudo: - ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break; case ARM::VST4d8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break; case ARM::VST4d16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break; case ARM::VST4d32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break; case ARM::VST1d64QPseudo_UPD: - ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break; case ARM::VST4q8Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break; case ARM::VST4q16Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break; case ARM::VST4q32Pseudo_UPD: - ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break; + case ARM::VST4q8oddPseudo: + case ARM::VST4q16oddPseudo: + case ARM::VST4q32oddPseudo: case ARM::VST4q8oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break; case ARM::VST4q16oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break; case ARM::VST4q32oddPseudo_UPD: - ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break; - } + ExpandVST(MBBI); + return true; + + case ARM::VLD1LNq8Pseudo: + case ARM::VLD1LNq16Pseudo: + case ARM::VLD1LNq32Pseudo: + case ARM::VLD1LNq8Pseudo_UPD: + case ARM::VLD1LNq16Pseudo_UPD: + case ARM::VLD1LNq32Pseudo_UPD: + case ARM::VLD2LNd8Pseudo: + case ARM::VLD2LNd16Pseudo: + case ARM::VLD2LNd32Pseudo: + case ARM::VLD2LNq16Pseudo: + case ARM::VLD2LNq32Pseudo: + case ARM::VLD2LNd8Pseudo_UPD: + case ARM::VLD2LNd16Pseudo_UPD: + case ARM::VLD2LNd32Pseudo_UPD: + case ARM::VLD2LNq16Pseudo_UPD: + case ARM::VLD2LNq32Pseudo_UPD: + case ARM::VLD3LNd8Pseudo: + case ARM::VLD3LNd16Pseudo: + case ARM::VLD3LNd32Pseudo: + case ARM::VLD3LNq16Pseudo: + case ARM::VLD3LNq32Pseudo: + case ARM::VLD3LNd8Pseudo_UPD: + case ARM::VLD3LNd16Pseudo_UPD: + case ARM::VLD3LNd32Pseudo_UPD: + case ARM::VLD3LNq16Pseudo_UPD: + case ARM::VLD3LNq32Pseudo_UPD: + case ARM::VLD4LNd8Pseudo: + case ARM::VLD4LNd16Pseudo: + case ARM::VLD4LNd32Pseudo: + case ARM::VLD4LNq16Pseudo: + case ARM::VLD4LNq32Pseudo: + case ARM::VLD4LNd8Pseudo_UPD: + case ARM::VLD4LNd16Pseudo_UPD: + case ARM::VLD4LNd32Pseudo_UPD: + case ARM::VLD4LNq16Pseudo_UPD: + case ARM::VLD4LNq32Pseudo_UPD: + case ARM::VST1LNq8Pseudo: + case ARM::VST1LNq16Pseudo: + case ARM::VST1LNq32Pseudo: + case ARM::VST1LNq8Pseudo_UPD: + case ARM::VST1LNq16Pseudo_UPD: + case ARM::VST1LNq32Pseudo_UPD: + case ARM::VST2LNd8Pseudo: + case ARM::VST2LNd16Pseudo: + case ARM::VST2LNd32Pseudo: + case ARM::VST2LNq16Pseudo: + case ARM::VST2LNq32Pseudo: + case ARM::VST2LNd8Pseudo_UPD: + case ARM::VST2LNd16Pseudo_UPD: + case ARM::VST2LNd32Pseudo_UPD: + case ARM::VST2LNq16Pseudo_UPD: + case ARM::VST2LNq32Pseudo_UPD: + case ARM::VST3LNd8Pseudo: + case ARM::VST3LNd16Pseudo: + case ARM::VST3LNd32Pseudo: + case ARM::VST3LNq16Pseudo: + case ARM::VST3LNq32Pseudo: + case ARM::VST3LNd8Pseudo_UPD: + case ARM::VST3LNd16Pseudo_UPD: + case ARM::VST3LNd32Pseudo_UPD: + case ARM::VST3LNq16Pseudo_UPD: + case ARM::VST3LNq32Pseudo_UPD: + case ARM::VST4LNd8Pseudo: + case ARM::VST4LNd16Pseudo: + case ARM::VST4LNd32Pseudo: + case ARM::VST4LNq16Pseudo: + case ARM::VST4LNq32Pseudo: + case ARM::VST4LNd8Pseudo_UPD: + case ARM::VST4LNd16Pseudo_UPD: + case ARM::VST4LNd32Pseudo_UPD: + case ARM::VST4LNq16Pseudo_UPD: + case ARM::VST4LNq32Pseudo_UPD: + ExpandLaneOp(MBBI); + return true; + + case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true; + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true; + case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true; + case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true; + case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true; + case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true; + } + + return false; +} - if (ModifiedOp) - Modified = true; +bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); + Modified |= ExpandMI(MBB, MBBI); MBBI = NMBBI; } @@ -508,8 +1222,11 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + const TargetMachine &TM = MF.getTarget(); + TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + TRI = TM.getRegisterInfo(); + STI = &TM.getSubtarget<ARMSubtarget>(); + AFI = MF.getInfo<ARMFunctionInfo>(); bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4892eae..9f29530 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -15,14 +15,17 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMCallingConv.h" #include "ARMRegisterInfo.h" #include "ARMTargetMachine.h" #include "ARMSubtarget.h" +#include "ARMConstantPoolValue.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -30,7 +33,9 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -43,12 +48,37 @@ using namespace llvm; static cl::opt<bool> -EnableARMFastISel("arm-fast-isel", - cl::desc("Turn on experimental ARM fast-isel support"), - cl::init(false), cl::Hidden); +DisableARMFastISel("disable-arm-fast-isel", + cl::desc("Turn off experimental ARM fast-isel support"), + cl::init(false), cl::Hidden); + +extern cl::opt<bool> EnableARMLongCalls; namespace { + // All possible address modes, plus some. + typedef struct Address { + enum { + RegBase, + FrameIndexBase + } BaseType; + + union { + unsigned Reg; + int FI; + } Base; + + int Offset; + unsigned Scale; + unsigned PlusReg; + + // Innocuous defaults for our address. + Address() + : BaseType(RegBase), Offset(0), Scale(0), PlusReg(0) { + Base.Reg = 0; + } + } Address; + class ARMFastISel : public FastISel { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can @@ -57,13 +87,14 @@ class ARMFastISel : public FastISel { const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; - const ARMFunctionInfo *AFI; + ARMFunctionInfo *AFI; - // Convenience variable to avoid checking all the time. + // Convenience variables to avoid some queries. bool isThumb; + LLVMContext *Context; public: - explicit ARMFastISel(FunctionLoweringInfo &funcInfo) + explicit ARMFastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo), TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), @@ -71,6 +102,7 @@ class ARMFastISel : public FastISel { Subtarget = &TM.getSubtarget<ARMSubtarget>(); AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); isThumb = AFI->isThumbFunction(); + Context = &funcInfo.Fn->getContext(); } // Code from FastISel.cpp. @@ -102,36 +134,73 @@ class ARMFastISel : public FastISel { virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx); - + // Backend specific FastISel code. virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); + virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); #include "ARMGenFastISel.inc" - + // Instruction selection routines. - virtual bool ARMSelectLoad(const Instruction *I); - virtual bool ARMSelectStore(const Instruction *I); - virtual bool ARMSelectBranch(const Instruction *I); + private: + bool SelectLoad(const Instruction *I); + bool SelectStore(const Instruction *I); + bool SelectBranch(const Instruction *I); + bool SelectCmp(const Instruction *I); + bool SelectFPExt(const Instruction *I); + bool SelectFPTrunc(const Instruction *I); + bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); + bool SelectSIToFP(const Instruction *I); + bool SelectFPToSI(const Instruction *I); + bool SelectSDiv(const Instruction *I); + bool SelectSRem(const Instruction *I); + bool SelectCall(const Instruction *I); + bool SelectSelect(const Instruction *I); + bool SelectRet(const Instruction *I); // Utility routines. private: - bool isTypeLegal(const Type *Ty, EVT &VT); - bool isLoadTypeLegal(const Type *Ty, EVT &VT); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset); - bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset); - bool ARMLoadAlloca(const Instruction *I); - bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg); - bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset); - bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg); - + bool isTypeLegal(const Type *Ty, MVT &VT); + bool isLoadTypeLegal(const Type *Ty, MVT &VT); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); + bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); + bool ARMComputeAddress(const Value *Obj, Address &Addr); + void ARMSimplifyAddress(Address &Addr, EVT VT); + unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); + unsigned ARMMaterializeInt(const Constant *C, EVT VT); + unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); + unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); + unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + + // Call handling routines. + private: + bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, + unsigned &ResultReg); + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); + bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, + SmallVectorImpl<unsigned> &ArgRegs, + SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, + SmallVectorImpl<unsigned> &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes); + bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes); + bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); + + // OptionalDef handling routines. + private: bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); + void AddLoadStoreOperands(EVT VT, Address &Addr, + const MachineInstrBuilder &MIB); }; } // end anonymous namespace -// #include "ARMGenCallingConv.inc" +#include "ARMGenCallingConv.inc" // DefinesOptionalPredicate - This is different from DefinesPredicate in that // we don't care about implicit defs here, just places we'll need to add a @@ -153,6 +222,9 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { // If the machine is predicable go ahead and add the predicate operands, if // it needs default CC operands add those. +// TODO: If we want to support thumb1 then we'll need to deal with optional +// CPSR defs that need to be added before the remaining operands. See s_cc_out +// for descriptions why. const MachineInstrBuilder & ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { MachineInstr *MI = &*MIB; @@ -160,7 +232,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { // Do we use a predicate? if (TII.isPredicable(MI)) AddDefaultPred(MIB); - + // Do we optionally set a predicate? Preds is size > 0 iff the predicate // defines CPSR. All other OptionalDefines in ARM are the CCR register. bool CPSR = false; @@ -297,7 +369,7 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); - + if (II.getNumDefs() >= 1) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addImm(Imm)); @@ -323,16 +395,84 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, return ResultReg; } -unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { - EVT VT = TLI.getValueType(C->getType(), true); +// TODO: Don't worry about 64-bit now, but when this is fixed remove the +// checks from the various callers. +unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { + if (VT == MVT::f64) return 0; - // Only handle simple types. - if (!VT.isSimple()) return 0; - - // TODO: This should be safe for fp because they're just bits from the - // Constant. - // TODO: Theoretically we could materialize fp constants with instructions - // from VFP3. + unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVRS), MoveReg) + .addReg(SrcReg)); + return MoveReg; +} + +unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { + if (VT == MVT::i64) return 0; + + unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVSR), MoveReg) + .addReg(SrcReg)); + return MoveReg; +} + +// For double width floating point we need to materialize two constants +// (the high and the low) into integer registers then use a move to get +// the combined constant into an FP reg. +unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { + const APFloat Val = CFP->getValueAPF(); + bool is64bit = VT == MVT::f64; + + // This checks to see if we can use VFP3 instructions to materialize + // a constant, otherwise we have to go through the constant pool. + if (TLI.isFPImmLegal(Val, VT)) { + unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), + DestReg) + .addFPImm(CFP)); + return DestReg; + } + + // Require VFP2 for loading fp constants. + if (!Subtarget->hasVFP2()) return false; + + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(CFP->getType()); + } + unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; + + // The extra reg is for addrmode5. + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), + DestReg) + .addConstantPoolIndex(Idx) + .addReg(0)); + return DestReg; +} + +unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { + + // For now 32-bit only. + if (VT != MVT::i32) return false; + + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + + // If we can do this in a single instruction without a constant pool entry + // do so now. + const ConstantInt *CI = cast<ConstantInt>(C); + if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { + unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), DestReg) + .addImm(CI->getSExtValue())); + return DestReg; + } // MachineConstantPool wants an explicit alignment. unsigned Align = TD.getPrefTypeAlignment(C->getType()); @@ -342,58 +482,144 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { } unsigned Idx = MCP.getConstantPoolIndex(C, Align); - unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); - // Different addressing modes between ARM/Thumb2 for constant pool loads. if (isThumb) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::t2LDRpci)) - .addReg(DestReg).addConstantPoolIndex(Idx)); + TII.get(ARM::t2LDRpci), DestReg) + .addConstantPoolIndex(Idx)); else + // The extra immediate is for addrmode2. AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::LDRcp)) - .addReg(DestReg).addConstantPoolIndex(Idx) - .addReg(0).addImm(0)); - + TII.get(ARM::LDRcp), DestReg) + .addConstantPoolIndex(Idx) + .addImm(0)); + return DestReg; } -bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) { - VT = TLI.getValueType(Ty, true); - +unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { + // For now 32-bit only. + if (VT != MVT::i32) return 0; + + Reloc::Model RelocM = TM.getRelocationModel(); + + // TODO: No external globals for now. + if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0; + + // TODO: Need more magic for ARM PIC. + if (!isThumb && (RelocM == Reloc::PIC_)) return 0; + + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(GV->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(GV->getType()); + } + + // Grab index. + unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, + ARMCP::CPValue, PCAdj); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + // Load value. + MachineInstrBuilder MIB; + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + if (isThumb) { + unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx); + if (RelocM == Reloc::PIC_) + MIB.addImm(Id); + } else { + // The extra immediate is for addrmode2. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), + DestReg) + .addConstantPoolIndex(Idx) + .addImm(0); + } + AddOptionalDefs(MIB); + return DestReg; +} + +unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { + EVT VT = TLI.getValueType(C->getType(), true); + // Only handle simple types. - if (VT == MVT::Other || !VT.isSimple()) return false; - + if (!VT.isSimple()) return 0; + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + return ARMMaterializeFP(CFP, VT); + else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return ARMMaterializeGV(GV, VT); + else if (isa<ConstantInt>(C)) + return ARMMaterializeInt(C, VT); + + return 0; +} + +unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { + // Don't handle dynamic allocas. + if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; + + MVT VT; + if (!isLoadTypeLegal(AI->getType(), VT)) return false; + + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + // This will get lowered later into the correct offsets and registers + // via rewriteXFrameIndex. + if (SI != FuncInfo.StaticAllocaMap.end()) { + TargetRegisterClass* RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addFrameIndex(SI->second) + .addImm(0)); + return ResultReg; + } + + return 0; +} + +bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { + EVT evt = TLI.getValueType(Ty, true); + + // Only handle simple types. + if (evt == MVT::Other || !evt.isSimple()) return false; + VT = evt.getSimpleVT(); + // Handle all legal types, i.e. a register that will directly hold this // value. return TLI.isTypeLegal(VT); } -bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) { +bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; - + // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. if (VT == MVT::i8 || VT == MVT::i16) return true; - + return false; } -// Computes the Reg+Offset to get to an object. -bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg, - int &Offset) { +// Computes the address to get to an object. +bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { // Some boilerplate from the X86 FastISel. const User *U = NULL; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast<Instruction>(Obj)) { - // Don't walk into other basic blocks; it's possible we haven't - // visited them yet, so the instructions may not yet be assigned - // virtual registers. - if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) - return false; - - Opcode = I->getOpcode(); - U = I; + // Don't walk into other basic blocks unless the object is an alloca from + // another block, otherwise it may not have a virtual register assigned. + if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { Opcode = C->getOpcode(); U = C; @@ -404,141 +630,282 @@ bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg, // Fast instruction selection doesn't support the special // address spaces. return false; - + switch (Opcode) { - default: - //errs() << "Failing Opcode is: " << *Op1 << "\n"; + default: break; + case Instruction::BitCast: { + // Look through bitcasts. + return ARMComputeAddress(U->getOperand(0), Addr); + } + case Instruction::IntToPtr: { + // Look past no-op inttoptrs. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return ARMComputeAddress(U->getOperand(0), Addr); + break; + } + case Instruction::PtrToInt: { + // Look past no-op ptrtoints. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return ARMComputeAddress(U->getOperand(0), Addr); + break; + } + case Instruction::GetElementPtr: { + Address SavedAddr = Addr; + int TmpOffset = Addr.Offset; + + // Iterate through the GEP folding the constants into offsets where + // we can. + gep_type_iterator GTI = gep_type_begin(U); + for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); + i != e; ++i, ++GTI) { + const Value *Op = *i; + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + const StructLayout *SL = TD.getStructLayout(STy); + unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); + TmpOffset += SL->getElementOffset(Idx); + } else { + uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + SmallVector<const Value *, 4> Worklist; + Worklist.push_back(Op); + do { + Op = Worklist.pop_back_val(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // Constant-offset addressing. + TmpOffset += CI->getSExtValue() * S; + } else if (isa<AddOperator>(Op) && + isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { + // An add with a constant operand. Fold the constant. + ConstantInt *CI = + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + TmpOffset += CI->getSExtValue() * S; + // Add the other operand back to the work list. + Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); + } else + goto unsupported_gep; + } while (!Worklist.empty()); + } + } + + // Try to grab the base operand now. + Addr.Offset = TmpOffset; + if (ARMComputeAddress(U->getOperand(0), Addr)) return true; + + // We failed, restore everything and try the other options. + Addr = SavedAddr; + + unsupported_gep: + break; + } case Instruction::Alloca: { - assert(false && "Alloca should have been handled earlier!"); - return false; + const AllocaInst *AI = cast<AllocaInst>(Obj); + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + Addr.BaseType = Address::FrameIndexBase; + Addr.Base.FI = SI->second; + return true; + } + break; } } - + + // Materialize the global variable's address into a reg which can + // then be used later to load the variable. if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { - //errs() << "Failing GV is: " << GV << "\n"; - (void)GV; - return false; + unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); + if (Tmp == 0) return false; + + Addr.Base.Reg = Tmp; + return true; } - + // Try to get this in a register if nothing else has worked. - Reg = getRegForValue(Obj); - if (Reg == 0) return false; + if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); + return Addr.Base.Reg != 0; +} + +void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { - // Since the offset may be too large for the load instruction + assert(VT.isSimple() && "Non-simple types are invalid here!"); + + bool needsLowering = false; + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(false && "Unhandled load/store type!"); + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + // Integer loads/stores handle 12-bit offsets. + needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); + break; + case MVT::f32: + case MVT::f64: + // Floating point operands handle 8-bit offsets. + needsLowering = ((Addr.Offset & 0xff) != Addr.Offset); + break; + } + + // If this is a stack pointer and the offset needs to be simplified then + // put the alloca address into a register, set the base type back to + // register and continue. This should almost never happen. + if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { + TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : + ARM::GPRRegisterClass; + unsigned ResultReg = createResultReg(RC); + unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addFrameIndex(Addr.Base.FI) + .addImm(0)); + Addr.Base.Reg = ResultReg; + Addr.BaseType = Address::RegBase; + } + + // Since the offset is too large for the load/store instruction // get the reg+offset into a register. - // TODO: Verify the additions work, otherwise we'll need to add the - // offset instead of 0 to the instructions and do all sorts of operand - // munging. - // TODO: Optimize this somewhat. - if (Offset != 0) { + if (needsLowering) { ARMCC::CondCodes Pred = ARMCC::AL; unsigned PredReg = 0; + TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : + ARM::GPRRegisterClass; + unsigned BaseReg = createResultReg(RC); + if (!isThumb) emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - Reg, Reg, Offset, Pred, PredReg, + BaseReg, Addr.Base.Reg, Addr.Offset, + Pred, PredReg, static_cast<const ARMBaseInstrInfo&>(TII)); else { assert(AFI->isThumb2Function()); emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - Reg, Reg, Offset, Pred, PredReg, + BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg, static_cast<const ARMBaseInstrInfo&>(TII)); } + Addr.Offset = 0; + Addr.Base.Reg = BaseReg; } - - return true; } -bool ARMFastISel::ARMLoadAlloca(const Instruction *I) { - Value *Op0 = I->getOperand(0); +void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, + const MachineInstrBuilder &MIB) { + // addrmode5 output depends on the selection dag addressing dividing the + // offset by 4 that it then later multiplies. Do this here as well. + if (VT.getSimpleVT().SimpleTy == MVT::f32 || + VT.getSimpleVT().SimpleTy == MVT::f64) + Addr.Offset /= 4; + + // Frame base works a bit differently. Handle it separately. + if (Addr.BaseType == Address::FrameIndexBase) { + int FI = Addr.Base.FI; + int Offset = Addr.Offset; + MachineMemOperand *MMO = + FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(FI, Offset), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + // Now add the rest of the operands. + MIB.addFrameIndex(FI); - // Verify it's an alloca. - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) { - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - - if (SI != FuncInfo.StaticAllocaMap.end()) { - TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); - unsigned ResultReg = createResultReg(RC); - TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, - ResultReg, SI->second, RC, - TM.getRegisterInfo()); - UpdateValueMap(I, ResultReg); - return true; - } + // ARM halfword load/stores need an additional operand. + if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); + + MIB.addImm(Addr.Offset); + MIB.addMemOperand(MMO); + } else { + // Now add the rest of the operands. + MIB.addReg(Addr.Base.Reg); + + // ARM halfword load/stores need an additional operand. + if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); + + MIB.addImm(Addr.Offset); } - return false; + AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, - unsigned Reg, int Offset) { - +bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { + assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; - + TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { - default: - assert(false && "Trying to emit for an unhandled type!"); - return false; + // This is mostly going to be Neon/vector support. + default: return false; case MVT::i16: - Opc = isThumb ? ARM::tLDRH : ARM::LDRH; - VT = MVT::i32; + Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; + RC = ARM::GPRRegisterClass; break; case MVT::i8: - Opc = isThumb ? ARM::tLDRB : ARM::LDRB; - VT = MVT::i32; + Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; + RC = ARM::GPRRegisterClass; break; case MVT::i32: - Opc = isThumb ? ARM::tLDR : ARM::LDR; + Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; + RC = ARM::GPRRegisterClass; + break; + case MVT::f32: + Opc = ARM::VLDRS; + RC = TLI.getRegClassFor(VT); + break; + case MVT::f64: + Opc = ARM::VLDRD; + RC = TLI.getRegClassFor(VT); break; } - - ResultReg = createResultReg(TLI.getRegClassFor(VT)); - - // TODO: Fix the Addressing modes so that these can share some code. - // Since this is a Thumb1 load this will work in Thumb1 or 2 mode. - if (isThumb) - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(Opc), ResultReg) - .addReg(Reg).addImm(Offset).addReg(0)); - else - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(Opc), ResultReg) - .addReg(Reg).addReg(0).addImm(Offset)); - + // Simplify this down to something we can handle. + ARMSimplifyAddress(Addr, VT); + + // Create the base instruction, then add the operands. + ResultReg = createResultReg(RC); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg); + AddLoadStoreOperands(VT, Addr, MIB); return true; } -bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) { - Value *Op1 = I->getOperand(1); +bool ARMFastISel::SelectLoad(const Instruction *I) { + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(I->getType(), VT)) + return false; - // Verify it's an alloca. - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); + // See if we can handle this address. + Address Addr; + if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; - if (SI != FuncInfo.StaticAllocaMap.end()) { - TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); - assert(SrcReg != 0 && "Nothing to store!"); - TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, - SrcReg, true /*isKill*/, SI->second, RC, - TM.getRegisterInfo()); - return true; - } - } - return false; + unsigned ResultReg; + if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; + UpdateValueMap(I, ResultReg); + return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, - unsigned DstReg, int Offset) { +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { unsigned StrOpc; switch (VT.getSimpleVT().SimpleTy) { + // This is mostly going to be Neon/vector support. default: return false; - case MVT::i1: - case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break; - case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break; - case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break; + case MVT::i1: { + unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass : + ARM::GPRRegisterClass); + unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), Res) + .addReg(SrcReg).addImm(1)); + SrcReg = Res; + } // Fallthrough here. + case MVT::i8: + StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12; + break; + case MVT::i16: + StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; + break; + case MVT::i32: + StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12; + break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; StrOpc = ARM::VSTRS; @@ -548,91 +915,162 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, StrOpc = ARM::VSTRD; break; } - - if (isThumb) - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(StrOpc), SrcReg) - .addReg(DstReg).addImm(Offset).addReg(0)); - else - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(StrOpc), SrcReg) - .addReg(DstReg).addReg(0).addImm(Offset)); - + // Simplify this down to something we can handle. + ARMSimplifyAddress(Addr, VT); + + // Create the base instruction, then add the operands. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(StrOpc)) + .addReg(SrcReg, getKillRegState(true)); + AddLoadStoreOperands(VT, Addr, MIB); return true; } -bool ARMFastISel::ARMSelectStore(const Instruction *I) { +bool ARMFastISel::SelectStore(const Instruction *I) { Value *Op0 = I->getOperand(0); unsigned SrcReg = 0; - // Yay type legalization - EVT VT; + // Verify we have a legal type before going any further. + MVT VT; if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) return false; // Get the value to be stored into a register. SrcReg = getRegForValue(Op0); - if (SrcReg == 0) - return false; - - // If we're an alloca we know we have a frame index and can emit the store - // quickly. - if (ARMStoreAlloca(I, SrcReg)) - return true; - - // Our register and offset with innocuous defaults. - unsigned Reg = 0; - int Offset = 0; - - // See if we can handle this as Reg + Offset - if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset)) - return false; - - if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false; - - return false; - -} + if (SrcReg == 0) return false; -bool ARMFastISel::ARMSelectLoad(const Instruction *I) { - // If we're an alloca we know we have a frame index and can emit the load - // directly in short order. - if (ARMLoadAlloca(I)) - return true; - - // Verify we have a legal type before going any further. - EVT VT; - if (!isLoadTypeLegal(I->getType(), VT)) - return false; - - // Our register and offset with innocuous defaults. - unsigned Reg = 0; - int Offset = 0; - - // See if we can handle this as Reg + Offset - if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset)) + // See if we can handle this address. + Address Addr; + if (!ARMComputeAddress(I->getOperand(1), Addr)) return false; - - unsigned ResultReg; - if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false; - - UpdateValueMap(I, ResultReg); + + if (!ARMEmitStore(VT, SrcReg, Addr)) return false; return true; } -bool ARMFastISel::ARMSelectBranch(const Instruction *I) { +static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { + switch (Pred) { + // Needs two compares... + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UEQ: + default: + // AL is our "false" for now. The other two need more compares. + return ARMCC::AL; + case CmpInst::ICMP_EQ: + case CmpInst::FCMP_OEQ: + return ARMCC::EQ; + case CmpInst::ICMP_SGT: + case CmpInst::FCMP_OGT: + return ARMCC::GT; + case CmpInst::ICMP_SGE: + case CmpInst::FCMP_OGE: + return ARMCC::GE; + case CmpInst::ICMP_UGT: + case CmpInst::FCMP_UGT: + return ARMCC::HI; + case CmpInst::FCMP_OLT: + return ARMCC::MI; + case CmpInst::ICMP_ULE: + case CmpInst::FCMP_OLE: + return ARMCC::LS; + case CmpInst::FCMP_ORD: + return ARMCC::VC; + case CmpInst::FCMP_UNO: + return ARMCC::VS; + case CmpInst::FCMP_UGE: + return ARMCC::PL; + case CmpInst::ICMP_SLT: + case CmpInst::FCMP_ULT: + return ARMCC::LT; + case CmpInst::ICMP_SLE: + case CmpInst::FCMP_ULE: + return ARMCC::LE; + case CmpInst::FCMP_UNE: + case CmpInst::ICMP_NE: + return ARMCC::NE; + case CmpInst::ICMP_UGE: + return ARMCC::HS; + case CmpInst::ICMP_ULT: + return ARMCC::LO; + } +} + +bool ARMFastISel::SelectBranch(const Instruction *I) { const BranchInst *BI = cast<BranchInst>(I); MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - + // Simple branch support. - unsigned CondReg = getRegForValue(BI->getCondition()); - if (CondReg == 0) return false; - - unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; - unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + + // If we can, avoid recomputing the compare - redoing it could lead to wonky + // behavior. + // TODO: Factor this out. + if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { + if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { + MVT VT; + const Type *Ty = CI->getOperand(0)->getType(); + if (!isTypeLegal(Ty, VT)) + return false; + + bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); + if (isFloat && !Subtarget->hasVFP2()) + return false; + + unsigned CmpOpc; + switch (VT.SimpleTy) { + default: return false; + // TODO: Verify compares. + case MVT::f32: + CmpOpc = ARM::VCMPES; + break; + case MVT::f64: + CmpOpc = ARM::VCMPED; + break; + case MVT::i32: + CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; + break; + } + + // Get the compare predicate. + ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); + + // We may not handle every CC for now. + if (ARMPred == ARMCC::AL) return false; + + unsigned Arg1 = getRegForValue(CI->getOperand(0)); + if (Arg1 == 0) return false; + + unsigned Arg2 = getRegForValue(CI->getOperand(1)); + if (Arg2 == 0) return false; + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CmpOpc)) + .addReg(Arg1).addReg(Arg2)); + + // For floating point we need to move the result to a comparison register + // that we can then use for branches. + if (isFloat) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::FMSTAT))); + + unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; + } + } + + unsigned CmpReg = getRegForValue(BI->getCondition()); + if (CmpReg == 0) return false; + + // Re-set the flags just in case. + unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(CondReg).addReg(CondReg)); + .addReg(CmpReg).addImm(0)); + + unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); FastEmitBranch(FBB, DL); @@ -640,18 +1078,809 @@ bool ARMFastISel::ARMSelectBranch(const Instruction *I) { return true; } +bool ARMFastISel::SelectCmp(const Instruction *I) { + const CmpInst *CI = cast<CmpInst>(I); + + MVT VT; + const Type *Ty = CI->getOperand(0)->getType(); + if (!isTypeLegal(Ty, VT)) + return false; + + bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); + if (isFloat && !Subtarget->hasVFP2()) + return false; + + unsigned CmpOpc; + unsigned CondReg; + switch (VT.SimpleTy) { + default: return false; + // TODO: Verify compares. + case MVT::f32: + CmpOpc = ARM::VCMPES; + CondReg = ARM::FPSCR; + break; + case MVT::f64: + CmpOpc = ARM::VCMPED; + CondReg = ARM::FPSCR; + break; + case MVT::i32: + CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; + CondReg = ARM::CPSR; + break; + } + + // Get the compare predicate. + ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); + + // We may not handle every CC for now. + if (ARMPred == ARMCC::AL) return false; + + unsigned Arg1 = getRegForValue(CI->getOperand(0)); + if (Arg1 == 0) return false; + + unsigned Arg2 = getRegForValue(CI->getOperand(1)); + if (Arg2 == 0) return false; + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(Arg1).addReg(Arg2)); + + // For floating point we need to move the result to a comparison register + // that we can then use for branches. + if (isFloat) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::FMSTAT))); + + // Now set a register based on the comparison. Explicitly set the predicates + // here. + unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; + TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass + : ARM::GPRRegisterClass; + unsigned DestReg = createResultReg(RC); + Constant *Zero + = ConstantInt::get(Type::getInt32Ty(*Context), 0); + unsigned ZeroReg = TargetMaterializeConstant(Zero); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) + .addReg(ZeroReg).addImm(1) + .addImm(ARMPred).addReg(CondReg); + + UpdateValueMap(I, DestReg); + return true; +} + +bool ARMFastISel::SelectFPExt(const Instruction *I) { + // Make sure we have VFP and that we're extending float to double. + if (!Subtarget->hasVFP2()) return false; + + Value *V = I->getOperand(0); + if (!I->getType()->isDoubleTy() || + !V->getType()->isFloatTy()) return false; + + unsigned Op = getRegForValue(V); + if (Op == 0) return false; + + unsigned Result = createResultReg(ARM::DPRRegisterClass); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VCVTDS), Result) + .addReg(Op)); + UpdateValueMap(I, Result); + return true; +} + +bool ARMFastISel::SelectFPTrunc(const Instruction *I) { + // Make sure we have VFP and that we're truncating double to float. + if (!Subtarget->hasVFP2()) return false; + + Value *V = I->getOperand(0); + if (!(I->getType()->isFloatTy() && + V->getType()->isDoubleTy())) return false; + + unsigned Op = getRegForValue(V); + if (Op == 0) return false; + + unsigned Result = createResultReg(ARM::SPRRegisterClass); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VCVTSD), Result) + .addReg(Op)); + UpdateValueMap(I, Result); + return true; +} + +bool ARMFastISel::SelectSIToFP(const Instruction *I) { + // Make sure we have VFP. + if (!Subtarget->hasVFP2()) return false; + + MVT DstVT; + const Type *Ty = I->getType(); + if (!isTypeLegal(Ty, DstVT)) + return false; + + unsigned Op = getRegForValue(I->getOperand(0)); + if (Op == 0) return false; + + // The conversion routine works on fp-reg to fp-reg and the operand above + // was an integer, move it to the fp registers if possible. + unsigned FP = ARMMoveToFPReg(MVT::f32, Op); + if (FP == 0) return false; + + unsigned Opc; + if (Ty->isFloatTy()) Opc = ARM::VSITOS; + else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; + else return 0; + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), + ResultReg) + .addReg(FP)); + UpdateValueMap(I, ResultReg); + return true; +} + +bool ARMFastISel::SelectFPToSI(const Instruction *I) { + // Make sure we have VFP. + if (!Subtarget->hasVFP2()) return false; + + MVT DstVT; + const Type *RetTy = I->getType(); + if (!isTypeLegal(RetTy, DstVT)) + return false; + + unsigned Op = getRegForValue(I->getOperand(0)); + if (Op == 0) return false; + + unsigned Opc; + const Type *OpTy = I->getOperand(0)->getType(); + if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; + else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; + else return 0; + + // f64->s32 or f32->s32 both need an intermediate f32 reg. + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), + ResultReg) + .addReg(Op)); + + // This result needs to be in an integer register, but the conversion only + // takes place in fp-regs. + unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); + if (IntReg == 0) return false; + + UpdateValueMap(I, IntReg); + return true; +} + +bool ARMFastISel::SelectSelect(const Instruction *I) { + MVT VT; + if (!isTypeLegal(I->getType(), VT)) + return false; + + // Things need to be register sized for register moves. + if (VT != MVT::i32) return false; + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + + unsigned CondReg = getRegForValue(I->getOperand(0)); + if (CondReg == 0) return false; + unsigned Op1Reg = getRegForValue(I->getOperand(1)); + if (Op1Reg == 0) return false; + unsigned Op2Reg = getRegForValue(I->getOperand(2)); + if (Op2Reg == 0) return false; + + unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(CondReg).addImm(1)); + unsigned ResultReg = createResultReg(RC); + unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) + .addReg(Op1Reg).addReg(Op2Reg) + .addImm(ARMCC::EQ).addReg(ARM::CPSR); + UpdateValueMap(I, ResultReg); + return true; +} + +bool ARMFastISel::SelectSDiv(const Instruction *I) { + MVT VT; + const Type *Ty = I->getType(); + if (!isTypeLegal(Ty, VT)) + return false; + + // If we have integer div support we should have selected this automagically. + // In case we have a real miss go ahead and return false and we'll pick + // it up later. + if (Subtarget->hasDivide()) return false; + + // Otherwise emit a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i8) + LC = RTLIB::SDIV_I8; + else if (VT == MVT::i16) + LC = RTLIB::SDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::SDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::SDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::SDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); + + return ARMEmitLibcall(I, LC); +} + +bool ARMFastISel::SelectSRem(const Instruction *I) { + MVT VT; + const Type *Ty = I->getType(); + if (!isTypeLegal(Ty, VT)) + return false; + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i8) + LC = RTLIB::SREM_I8; + else if (VT == MVT::i16) + LC = RTLIB::SREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::SREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::SREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::SREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); + + return ARMEmitLibcall(I, LC); +} + +bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { + EVT VT = TLI.getValueType(I->getType(), true); + + // We can get here in the case when we want to use NEON for our fp + // operations, but can't figure out how to. Just use the vfp instructions + // if we have them. + // FIXME: It'd be nice to use NEON instructions. + const Type *Ty = I->getType(); + bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); + if (isFloat && !Subtarget->hasVFP2()) + return false; + + unsigned Op1 = getRegForValue(I->getOperand(0)); + if (Op1 == 0) return false; + + unsigned Op2 = getRegForValue(I->getOperand(1)); + if (Op2 == 0) return false; + + unsigned Opc; + bool is64bit = VT == MVT::f64 || VT == MVT::i64; + switch (ISDOpcode) { + default: return false; + case ISD::FADD: + Opc = is64bit ? ARM::VADDD : ARM::VADDS; + break; + case ISD::FSUB: + Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; + break; + case ISD::FMUL: + Opc = is64bit ? ARM::VMULD : ARM::VMULS; + break; + } + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Op1).addReg(Op2)); + UpdateValueMap(I, ResultReg); + return true; +} + +// Call Handling Code + +bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, + EVT SrcVT, unsigned &ResultReg) { + unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, + Src, /*TODO: Kill=*/false); + + if (RR != 0) { + ResultReg = RR; + return true; + } else + return false; +} + +// This is largely taken directly from CCAssignFnForNode - we don't support +// varargs in FastISel so that part has been removed. +// TODO: We may not support all of this. +CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { + switch (CC) { + default: + llvm_unreachable("Unsupported calling convention"); + case CallingConv::Fast: + // Ignore fastcc. Silence compiler warnings. + (void)RetFastCC_ARM_APCS; + (void)FastCC_ARM_APCS; + // Fallthrough + case CallingConv::C: + // Use target triple & subtarget features to do actual dispatch. + if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && + FloatABIType == FloatABI::Hard) + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + else + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + } else + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + case CallingConv::ARM_APCS: + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + } +} + +bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, + SmallVectorImpl<unsigned> &ArgRegs, + SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, + SmallVectorImpl<unsigned> &RegArgs, + CallingConv::ID CC, + unsigned &NumBytes) { + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, false, TM, ArgLocs, *Context); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); + + // Get a count of how many bytes are to be pushed on the stack. + NumBytes = CCInfo.getNextStackOffset(); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(AdjStackDown)) + .addImm(NumBytes)); + + // Process the args. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + unsigned Arg = ArgRegs[VA.getValNo()]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // We don't handle NEON/vector parameters yet. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) + return false; + + // Handle arg promotion, etc. + switch (VA.getLocInfo()) { + case CCValAssign::Full: break; + case CCValAssign::SExt: { + bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + assert(Emitted && "Failed to emit a sext!"); (void)Emitted; + Emitted = true; + ArgVT = VA.getLocVT(); + break; + } + case CCValAssign::ZExt: { + bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + assert(Emitted && "Failed to emit a zext!"); (void)Emitted; + Emitted = true; + ArgVT = VA.getLocVT(); + break; + } + case CCValAssign::AExt: { + bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + if (!Emitted) + Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + if (!Emitted) + Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), + Arg, ArgVT, Arg); + + assert(Emitted && "Failed to emit a aext!"); (void)Emitted; + ArgVT = VA.getLocVT(); + break; + } + case CCValAssign::BCvt: { + unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, + /*TODO: Kill=*/false); + assert(BC != 0 && "Failed to emit a bitcast!"); + Arg = BC; + ArgVT = VA.getLocVT(); + break; + } + default: llvm_unreachable("Unknown arg promotion!"); + } + + // Now copy/store arg to correct locations. + if (VA.isRegLoc() && !VA.needsCustom()) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + VA.getLocReg()) + .addReg(Arg); + RegArgs.push_back(VA.getLocReg()); + } else if (VA.needsCustom()) { + // TODO: We need custom lowering for vector (v2f64) args. + if (VA.getLocVT() != MVT::f64) return false; + + CCValAssign &NextVA = ArgLocs[++i]; + + // TODO: Only handle register args for now. + if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVRRD), VA.getLocReg()) + .addReg(NextVA.getLocReg(), RegState::Define) + .addReg(Arg)); + RegArgs.push_back(VA.getLocReg()); + RegArgs.push_back(NextVA.getLocReg()); + } else { + assert(VA.isMemLoc()); + // Need to store on the stack. + Address Addr; + Addr.BaseType = Address::RegBase; + Addr.Base.Reg = ARM::SP; + Addr.Offset = VA.getLocMemOffset(); + + if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; + } + } + return true; +} + +bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes) { + // Issue CALLSEQ_END + unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(AdjStackUp)) + .addImm(NumBytes).addImm(0)); + + // Now the return value. + if (RetVT != MVT::isVoid) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CC, false, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); + + // Copy all of the result registers out of their specified physreg. + if (RVLocs.size() == 2 && RetVT == MVT::f64) { + // For this move we copy into two registers and then move into the + // double fp reg we want. + EVT DestVT = RVLocs[0].getValVT(); + TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); + unsigned ResultReg = createResultReg(DstRC); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVDRR), ResultReg) + .addReg(RVLocs[0].getLocReg()) + .addReg(RVLocs[1].getLocReg())); + + UsedRegs.push_back(RVLocs[0].getLocReg()); + UsedRegs.push_back(RVLocs[1].getLocReg()); + + // Finally update the result. + UpdateValueMap(I, ResultReg); + } else { + assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); + EVT CopyVT = RVLocs[0].getValVT(); + TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); + + unsigned ResultReg = createResultReg(DstRC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(RVLocs[0].getLocReg()); + UsedRegs.push_back(RVLocs[0].getLocReg()); + + // Finally update the result. + UpdateValueMap(I, ResultReg); + } + } + + return true; +} + +bool ARMFastISel::SelectRet(const Instruction *I) { + const ReturnInst *Ret = cast<ReturnInst>(I); + const Function &F = *I->getParent()->getParent(); + + if (!FuncInfo.CanLowerReturn) + return false; + + if (F.isVarArg()) + return false; + + CallingConv::ID CC = F.getCallingConv(); + if (Ret->getNumOperands() > 0) { + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ValLocs; + CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); + CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); + + const Value *RV = Ret->getOperand(0); + unsigned Reg = getRegForValue(RV); + if (Reg == 0) + return false; + + // Only handle a single return value for now. + if (ValLocs.size() != 1) + return false; + + CCValAssign &VA = ValLocs[0]; + + // Don't bother handling odd stuff for now. + if (VA.getLocInfo() != CCValAssign::Full) + return false; + // Only handle register returns for now. + if (!VA.isRegLoc()) + return false; + // TODO: For now, don't try to handle cases where getLocInfo() + // says Full but the types don't match. + if (TLI.getValueType(RV->getType()) != VA.getValVT()) + return false; + + // Make the copy. + unsigned SrcReg = Reg + VA.getValNo(); + unsigned DstReg = VA.getLocReg(); + const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); + // Avoid a cross-class copy. This is very unlikely. + if (!SrcRC->contains(DstReg)) + return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + DstReg).addReg(SrcReg); + + // Mark the register as live out of the function. + MRI.addLiveOut(VA.getLocReg()); + } + + unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(RetOpc))); + return true; +} + +// A quick function that will emit a call for a named libcall in F with the +// vector of passed arguments for the Instruction in I. We can assume that we +// can emit a call for any libcall we can produce. This is an abridged version +// of the full call infrastructure since we won't need to worry about things +// like computed function pointers or strange arguments at call sites. +// TODO: Try to unify this and the normal call bits for ARM, then try to unify +// with X86. +bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { + CallingConv::ID CC = TLI.getLibcallCallingConv(Call); + + // Handle *simple* calls for now. + const Type *RetTy = I->getType(); + MVT RetVT; + if (RetTy->isVoidTy()) + RetVT = MVT::isVoid; + else if (!isTypeLegal(RetTy, RetVT)) + return false; + + // For now we're using BLX etc on the assumption that we have v5t ops. + if (!Subtarget->hasV5TOps()) return false; + + // TODO: For now if we have long calls specified we don't handle the call. + if (EnableARMLongCalls) return false; + + // Set up the argument vectors. + SmallVector<Value*, 8> Args; + SmallVector<unsigned, 8> ArgRegs; + SmallVector<MVT, 8> ArgVTs; + SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; + Args.reserve(I->getNumOperands()); + ArgRegs.reserve(I->getNumOperands()); + ArgVTs.reserve(I->getNumOperands()); + ArgFlags.reserve(I->getNumOperands()); + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + Value *Op = I->getOperand(i); + unsigned Arg = getRegForValue(Op); + if (Arg == 0) return false; + + const Type *ArgTy = Op->getType(); + MVT ArgVT; + if (!isTypeLegal(ArgTy, ArgVT)) return false; + + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + Flags.setOrigAlign(OriginalAlignment); + + Args.push_back(Op); + ArgRegs.push_back(Arg); + ArgVTs.push_back(ArgVT); + ArgFlags.push_back(Flags); + } + + // Handle the arguments now that we've gotten them. + SmallVector<unsigned, 4> RegArgs; + unsigned NumBytes; + if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) + return false; + + // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. + // TODO: Turn this into the table of arm call ops. + MachineInstrBuilder MIB; + unsigned CallOpc; + if(isThumb) { + CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + // Explicitly adding the predicate here. + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc))) + .addExternalSymbol(TLI.getLibcallName(Call)); + } else { + CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + // Explicitly adding the predicate here. + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc)) + .addExternalSymbol(TLI.getLibcallName(Call))); + } + + // Add implicit physical register uses to the call. + for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) + MIB.addReg(RegArgs[i]); + + // Finish off the call including any return values. + SmallVector<unsigned, 4> UsedRegs; + if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; + + // Set all unused physreg defs as dead. + static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + + return true; +} + +bool ARMFastISel::SelectCall(const Instruction *I) { + const CallInst *CI = cast<CallInst>(I); + const Value *Callee = CI->getCalledValue(); + + // Can't handle inline asm or worry about intrinsics yet. + if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; + + // Only handle global variable Callees that are direct calls. + const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); + if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel())) + return false; + + // Check the calling convention. + ImmutableCallSite CS(CI); + CallingConv::ID CC = CS.getCallingConv(); + + // TODO: Avoid some calling conventions? + + // Let SDISel handle vararg functions. + const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + if (FTy->isVarArg()) + return false; + + // Handle *simple* calls for now. + const Type *RetTy = I->getType(); + MVT RetVT; + if (RetTy->isVoidTy()) + RetVT = MVT::isVoid; + else if (!isTypeLegal(RetTy, RetVT)) + return false; + + // For now we're using BLX etc on the assumption that we have v5t ops. + // TODO: Maybe? + if (!Subtarget->hasV5TOps()) return false; + + // TODO: For now if we have long calls specified we don't handle the call. + if (EnableARMLongCalls) return false; + + // Set up the argument vectors. + SmallVector<Value*, 8> Args; + SmallVector<unsigned, 8> ArgRegs; + SmallVector<MVT, 8> ArgVTs; + SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; + Args.reserve(CS.arg_size()); + ArgRegs.reserve(CS.arg_size()); + ArgVTs.reserve(CS.arg_size()); + ArgFlags.reserve(CS.arg_size()); + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + unsigned Arg = getRegForValue(*i); + + if (Arg == 0) + return false; + ISD::ArgFlagsTy Flags; + unsigned AttrInd = i - CS.arg_begin() + 1; + if (CS.paramHasAttr(AttrInd, Attribute::SExt)) + Flags.setSExt(); + if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) + Flags.setZExt(); + + // FIXME: Only handle *easy* calls for now. + if (CS.paramHasAttr(AttrInd, Attribute::InReg) || + CS.paramHasAttr(AttrInd, Attribute::StructRet) || + CS.paramHasAttr(AttrInd, Attribute::Nest) || + CS.paramHasAttr(AttrInd, Attribute::ByVal)) + return false; + + const Type *ArgTy = (*i)->getType(); + MVT ArgVT; + if (!isTypeLegal(ArgTy, ArgVT)) + return false; + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); + Flags.setOrigAlign(OriginalAlignment); + + Args.push_back(*i); + ArgRegs.push_back(Arg); + ArgVTs.push_back(ArgVT); + ArgFlags.push_back(Flags); + } + + // Handle the arguments now that we've gotten them. + SmallVector<unsigned, 4> RegArgs; + unsigned NumBytes; + if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) + return false; + + // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. + // TODO: Turn this into the table of arm call ops. + MachineInstrBuilder MIB; + unsigned CallOpc; + // Explicitly adding the predicate here. + if(isThumb) { + CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + // Explicitly adding the predicate here. + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc))) + .addGlobalAddress(GV, 0, 0); + } else { + CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + // Explicitly adding the predicate here. + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc)) + .addGlobalAddress(GV, 0, 0)); + } + + // Add implicit physical register uses to the call. + for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) + MIB.addReg(RegArgs[i]); + + // Finish off the call including any return values. + SmallVector<unsigned, 4> UsedRegs; + if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; + + // Set all unused physreg defs as dead. + static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + + return true; + +} + // TODO: SoftFP support. bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { - // No Thumb-1 for now. - if (isThumb && !AFI->isThumb2Function()) return false; - + switch (I->getOpcode()) { case Instruction::Load: - return ARMSelectLoad(I); + return SelectLoad(I); case Instruction::Store: - return ARMSelectStore(I); + return SelectStore(I); case Instruction::Br: - return ARMSelectBranch(I); + return SelectBranch(I); + case Instruction::ICmp: + case Instruction::FCmp: + return SelectCmp(I); + case Instruction::FPExt: + return SelectFPExt(I); + case Instruction::FPTrunc: + return SelectFPTrunc(I); + case Instruction::SIToFP: + return SelectSIToFP(I); + case Instruction::FPToSI: + return SelectFPToSI(I); + case Instruction::FAdd: + return SelectBinaryOp(I, ISD::FADD); + case Instruction::FSub: + return SelectBinaryOp(I, ISD::FSUB); + case Instruction::FMul: + return SelectBinaryOp(I, ISD::FMUL); + case Instruction::SDiv: + return SelectSDiv(I); + case Instruction::SRem: + return SelectSRem(I); + case Instruction::Call: + return SelectCall(I); + case Instruction::Select: + return SelectSelect(I); + case Instruction::Ret: + return SelectRet(I); default: break; } return false; @@ -659,7 +1888,14 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { namespace llvm { llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { - if (EnableARMFastISel) return new ARMFastISel(funcInfo); + // Completely untested on non-darwin. + const TargetMachine &TM = funcInfo.MF->getTarget(); + + // Darwin and thumb1 only for now. + const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); + if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && + !DisableARMFastISel) + return new ARMFastISel(funcInfo); return 0; } } diff --git a/lib/Target/ARM/ARMFixupKinds.h b/lib/Target/ARM/ARMFixupKinds.h new file mode 100644 index 0000000..3d175e3 --- /dev/null +++ b/lib/Target/ARM/ARMFixupKinds.h @@ -0,0 +1,97 @@ +//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ARM_ARMFIXUPKINDS_H +#define LLVM_ARM_ARMFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace ARM { +enum Fixups { + // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol + // addresses + fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind, + + // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with + // the 16-bit halfwords reordered. + fixup_t2_ldst_pcrel_12, + + // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses + // used in VFP instructions where the lower 2 bits are not encoded + // (so it's encoded as an 8-bit immediate). + fixup_arm_pcrel_10, + // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for + // the short-swapped encoding of Thumb2 instructions. + fixup_t2_pcrel_10, + // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol + // addresses where the lower 2 bits are not encoded (so it's encoded as an + // 8-bit immediate). + fixup_thumb_adr_pcrel_10, + // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR + // instruction. + fixup_arm_adr_pcrel_12, + // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR + // instruction. + fixup_t2_adr_pcrel_12, + // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch + // instructions. + fixup_arm_condbranch, + // fixup_arm_uncondbranch - 24-bit PC relative relocation for + // branch instructions. (unconditional) + fixup_arm_uncondbranch, + // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct + // uconditional branch instructions. + fixup_t2_condbranch, + // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct + // branch unconditional branch instructions. + fixup_t2_uncondbranch, + + // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. + fixup_arm_thumb_br, + + // fixup_arm_thumb_blx - Fixup for Thumb BL instructions. + fixup_arm_thumb_bl, + + // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions. + fixup_arm_thumb_blx, + + // fixup_arm_thumb_cb - Fixup for Thumb branch instructions. + fixup_arm_thumb_cb, + + // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs. + fixup_arm_thumb_cp, + + // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions. + fixup_arm_thumb_bcc, + + // The next two are for the movt/movw pair + // the 16bit imm field are split into imm{15-12} and imm{11-0} + fixup_arm_movt_hi16, // :upper16: + fixup_arm_movw_lo16, // :lower16: + fixup_t2_movt_hi16, // :upper16: + fixup_t2_movw_lo16, // :lower16: + + // It is possible to create an "immediate" that happens to be pcrel. + // movw r0, :lower16:Foo-(Bar+8) and movt r0, :upper16:Foo-(Bar+8) + // result in different reloc tags than the above two. + // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC + fixup_arm_movt_hi16_pcrel, // :upper16: + fixup_arm_movw_lo16_pcrel, // :lower16: + fixup_t2_movt_hi16_pcrel, // :upper16: + fixup_t2_movw_lo16_pcrel, // :lower16: + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} +} + +#endif diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h deleted file mode 100644 index d5dae24..0000000 --- a/lib/Target/ARM/ARMFrameInfo.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef ARM_FRAMEINFO_H -#define ARM_FRAMEINFO_H - -#include "ARM.h" -#include "ARMSubtarget.h" -#include "llvm/Target/TargetFrameInfo.h" - -namespace llvm { - -class ARMFrameInfo : public TargetFrameInfo { -public: - explicit ARMFrameInfo(const ARMSubtarget &ST) - : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0, 4) { - } -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp new file mode 100644 index 0000000..f42c6db --- /dev/null +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -0,0 +1,1021 @@ +//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the ARM implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "ARMFrameLowering.h" +#include "ARMAddressingModes.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. +bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + + // Mac OS X requires FP not to be clobbered for backtracing purpose. + if (STI.isTargetDarwin()) + return true; + + const MachineFrameInfo *MFI = MF.getFrameInfo(); + // Always eliminate non-leaf frame pointers. + return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || + RegInfo->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); +} + +/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is +/// not required, we reserve argument space for call sites in the function +/// immediately on entry to the current function. This eliminates the need for +/// add/sub sp brackets around call sites. Returns true if the call frame is +/// included as part of the stack frame. +bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + unsigned CFSize = FFI->getMaxCallFrameSize(); + // It's not always a good idea to include the call frame as part of the + // stack frame. ARM (especially Thumb) has small immediate offset to + // address the stack frame. So a large call frame can cause poor codegen + // and may even makes it impossible to scavenge a register. + if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 + return false; + + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// canSimplifyCallFramePseudos - If there is a reserved call frame, the +/// call frame pseudos can be simplified. Unlike most targets, having a FP +/// is not sufficient here since we still may reference some objects via SP +/// even when FP is available in Thumb2 mode. +bool +ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { + return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); +} + +static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, + const ARMBaseInstrInfo &TII, + const unsigned *CSRegs) { + // Integer spill area is handled with "pop". + if (MI->getOpcode() == ARM::LDMIA_RET || + MI->getOpcode() == ARM::t2LDMIA_RET || + MI->getOpcode() == ARM::LDMIA_UPD || + MI->getOpcode() == ARM::t2LDMIA_UPD || + MI->getOpcode() == ARM::VLDMDIA_UPD) { + // The first two operands are predicates. The last two are + // imp-def and imp-use of SP. Check everything in between. + for (int i = 5, e = MI->getNumOperands(); i != e; ++i) + if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + return false; + return true; + } + if ((MI->getOpcode() == ARM::LDR_POST || + MI->getOpcode() == ARM::t2LDR_POST) && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && + MI->getOperand(1).getReg() == ARM::SP) + return true; + + return false; +} + +static void +emitSPUpdate(bool isARM, + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, const ARMBaseInstrInfo &TII, + int NumBytes, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII); +} + +void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const ARMBaseRegisterInfo *RegInfo = + static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + assert(!AFI->isThumb1OnlyFunction() && + "This emitPrologue does not support Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned NumBytes = MFI->getStackSize(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + + // Determine the sizes of each callee-save spill areas and record which frame + // belongs to which callee-save spill areas. + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + int FramePtrSpillFI = 0; + + // Allocate the vararg register save area. This is not counted in NumBytes. + if (VARegSaveSize) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + return; + } + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + int FI = CSI[i].getFrameIdx(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + if (STI.isTargetDarwin()) { + AFI->addGPRCalleeSavedArea2Frame(FI); + GPRCS2Size += 4; + } else { + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + } + break; + default: + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } + } + + // Move past area 1. + if (GPRCS1Size > 0) MBBI++; + + // Set FP to point to the stack slot that contains the previous FP. + // For Darwin, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not Darwin, all the callee-saved registers go + // into spill area 1, including the FP in R11. In either case, it is + // now safe to emit this assignment. + bool HasFP = hasFP(MF); + if (HasFP) { + unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + AddDefaultCC(AddDefaultPred(MIB)); + } + + // Move past area 2. + if (GPRCS2Size > 0) MBBI++; + + // Determine starting offsets of spill areas. + unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; + unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + if (HasFP) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); + AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); + AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); + AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + + // Move past area 3. + if (DPRCSSize > 0) MBBI++; + + NumBytes = DPRCSOffset; + if (NumBytes) { + // Adjust SP after all the callee-save spills. + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + if (HasFP && isARM) + // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 + // Note it's not safe to do this in Thumb2 mode because it would have + // taken two instructions: + // mov sp, r7 + // sub sp, #24 + // If an interrupt is taken between the two instructions, then sp is in + // an inconsistent state (pointing to the middle of callee-saved area). + // The interrupt handler can end up clobbering the registers. + AFI->setShouldRestoreSPFromFP(true); + } + + if (STI.isTargetELF() && hasFP(MF)) + MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); + + AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); + AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + + // If we need dynamic stack realignment, do it here. Be paranoid and make + // sure if we also have VLAs, we have a base pointer for frame access. + if (RegInfo->needsStackRealignment(MF)) { + unsigned MaxAlign = MFI->getMaxAlignment(); + assert (!AFI->isThumb1OnlyFunction()); + if (!AFI->isThumbFunction()) { + // Emit bic sp, sp, MaxAlign + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, + TII.get(ARM::BICri), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addImm(MaxAlign-1))); + } else { + // We cannot use sp as source/dest register here, thus we're emitting the + // following sequence: + // mov r4, sp + // bic r4, r4, MaxAlign + // mov sp, r4 + // FIXME: It will be better just to find spare register here. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4) + .addReg(ARM::SP, RegState::Kill); + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, + TII.get(ARM::t2BICri), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(MaxAlign-1))); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) + .addReg(ARM::R4, RegState::Kill); + } + + AFI->setShouldRestoreSPFromFP(true); + } + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (RegInfo->hasBasePointer(MF)) { + if (isARM) + BuildMI(MBB, MBBI, dl, + TII.get(ARM::MOVr), RegInfo->getBaseRegister()) + .addReg(ARM::SP) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, + TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister()) + .addReg(ARM::SP); + } + + // If the frame has variable sized objects then the epilogue must restore + // the sp from fp. We can assume there's an FP here since hasFP already + // checks for hasVarSizedObjects. + if (MFI->hasVarSizedObjects()) + AFI->setShouldRestoreSPFromFP(true); +} + +void ARMFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getDesc().isReturn() && + "Can only insert epilog into returning blocks"); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc dl = MBBI->getDebugLoc(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + assert(!AFI->isThumb1OnlyFunction() && + "This emitEpilogue does not support Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + int NumBytes = (int)MFI->getStackSize(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + } else { + // Unwind MBBI to point to first LDR / VLDRD. + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + if (MBBI != MBB.begin()) { + do + --MBBI; + while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); + if (!isCSRestore(MBBI, TII, CSRegs)) + ++MBBI; + } + + // Move SP to start of FP callee save spill area. + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedAreaSize()); + + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (AFI->shouldRestoreSPFromFP()) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + if (NumBytes) { + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + else { + // It's not possible to restore SP from FP in a single instruction. + // For Darwin, this looks like: + // mov sp, r7 + // sub sp, #24 + // This is bad, if an interrupt is taken after the mov, sp is in an + // inconsistent state. + // Use the first callee-saved register as a scratch register. + assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + "No scratch register to restore SP from FP!"); + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) + .addReg(ARM::R4); + } + } else { + // Thumb2 or ARM. + if (isARM) + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) + .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) + .addReg(FramePtr); + } + } else if (NumBytes) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + + // Increment past our save areas. + if (AFI->getDPRCalleeSavedAreaSize()) MBBI++; + if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; + if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; + } + + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || + RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + + // Jump to label or value in register. + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { + unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) + ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd) + : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); + if (JumpTarget.isGlobal()) + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + else { + assert(JumpTarget.isSymbol()); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + } else if (RetOpcode == ARM::TCRETURNri) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); + } else if (RetOpcode == ARM::TCRETURNriND) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). + addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = prior(MBBI); + for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } + + if (VARegSaveSize) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); +} + +/// getFrameIndexReference - Provide a base+offset reference to an FI slot for +/// debug info. It's the same as what we use for resolving the code-gen +/// references for now. FIXME: This can go wrong when references are +/// SP-relative and simple call frames aren't used. +int +ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const { + return ResolveFrameIndexReference(MF, FI, FrameReg, 0); +} + +int +ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg, + int SPAdj) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const ARMBaseRegisterInfo *RegInfo = + static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + int FPOffset = Offset - AFI->getFramePtrSpillOffset(); + bool isFixed = MFI->isFixedObjectIndex(FI); + + FrameReg = ARM::SP; + Offset += SPAdj; + if (AFI->isGPRCalleeSavedArea1Frame(FI)) + return Offset - AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FI)) + return Offset - AFI->getGPRCalleeSavedArea2Offset(); + else if (AFI->isDPRCalleeSavedAreaFrame(FI)) + return Offset - AFI->getDPRCalleeSavedAreaOffset(); + + // When dynamically realigning the stack, use the frame pointer for + // parameters, and the stack/base pointer for locals. + if (RegInfo->needsStackRealignment(MF)) { + assert (hasFP(MF) && "dynamic stack realignment without a FP!"); + if (isFixed) { + FrameReg = RegInfo->getFrameRegister(MF); + Offset = FPOffset; + } else if (MFI->hasVarSizedObjects()) { + assert(RegInfo->hasBasePointer(MF) && + "VLAs and dynamic stack alignment, but missing base pointer!"); + FrameReg = RegInfo->getBaseRegister(); + } + return Offset; + } + + // If there is a frame pointer, use it when we can. + if (hasFP(MF) && AFI->hasStackFrame()) { + // Use frame pointer to reference fixed objects. Use it for locals if + // there are VLAs (and thus the SP isn't reliable as a base). + if (isFixed || (MFI->hasVarSizedObjects() && + !RegInfo->hasBasePointer(MF))) { + FrameReg = RegInfo->getFrameRegister(MF); + return FPOffset; + } else if (MFI->hasVarSizedObjects()) { + assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); + // Try to use the frame pointer if we can, else use the base pointer + // since it's available. This is handy for the emergency spill slot, in + // particular. + if (AFI->isThumb2Function()) { + if (FPOffset >= -255 && FPOffset < 0) { + FrameReg = RegInfo->getFrameRegister(MF); + return FPOffset; + } + } else + FrameReg = RegInfo->getBaseRegister(); + } else if (AFI->isThumb2Function()) { + // In Thumb2 mode, the negative offset is very limited. Try to avoid + // out of range references. + if (FPOffset >= -255 && FPOffset < 0) { + FrameReg = RegInfo->getFrameRegister(MF); + return FPOffset; + } + } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { + // Otherwise, use SP or FP, whichever is closer to the stack slot. + FrameReg = RegInfo->getFrameRegister(MF); + return FPOffset; + } + } + // Use the base pointer if we have one. + if (RegInfo->hasBasePointer(MF)) + FrameReg = RegInfo->getBaseRegister(); + return Offset; +} + +int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + unsigned FrameReg; + return getFrameIndexReference(MF, FI, FrameReg); +} + +void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + unsigned StmOpc, unsigned StrOpc, + bool NoGap, + bool(*Func)(unsigned, bool)) const { + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + SmallVector<std::pair<unsigned,bool>, 4> Regs; + unsigned i = CSI.size(); + while (i != 0) { + unsigned LastReg = 0; + for (; i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (!(Func)(Reg, STI.isTargetDarwin())) continue; + + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for + // @llvm.returnaddress then it's already added to the function and + // entry block live-in sets. + bool isKill = true; + if (Reg == ARM::LR) { + if (MF.getFrameInfo()->isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; + } + + if (isKill) + MBB.addLiveIn(Reg); + + // If NoGap is true, push consecutive registers and then leave the rest + // for other instructions. e.g. + // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} + if (NoGap && LastReg && LastReg != Reg-1) + break; + LastReg = Reg; + Regs.push_back(std::make_pair(Reg, isKill)); + } + + if (Regs.empty()) + continue; + if (Regs.size() > 1 || StrOpc== 0) { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) + .addReg(ARM::SP)); + for (unsigned i = 0, e = Regs.size(); i < e; ++i) + MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); + } else if (Regs.size() == 1) { + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), + ARM::SP) + .addReg(Regs[0].first, getKillRegState(Regs[0].second)) + .addReg(ARM::SP); + // ARM mode needs an extra reg0 here due to addrmode2. Will go away once + // that refactoring is complete (eventually). + if (StrOpc == ARM::STR_PRE) { + MIB.addReg(0); + MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift)); + } else + MIB.addImm(-4); + AddDefaultPred(MIB); + } + Regs.clear(); + } +} + +void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + unsigned LdmOpc, unsigned LdrOpc, + bool isVarArg, bool NoGap, + bool(*Func)(unsigned, bool)) const { + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL = MI->getDebugLoc(); + unsigned RetOpcode = MI->getOpcode(); + bool isTailCall = (RetOpcode == ARM::TCRETURNdi || + RetOpcode == ARM::TCRETURNdiND || + RetOpcode == ARM::TCRETURNri || + RetOpcode == ARM::TCRETURNriND); + + SmallVector<unsigned, 4> Regs; + unsigned i = CSI.size(); + while (i != 0) { + unsigned LastReg = 0; + bool DeleteRet = false; + for (; i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (!(Func)(Reg, STI.isTargetDarwin())) continue; + + if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { + Reg = ARM::PC; + LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + // Fold the return instruction into the LDM. + DeleteRet = true; + } + + // If NoGap is true, pop consecutive registers and then leave the rest + // for other instructions. e.g. + // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} + if (NoGap && LastReg && LastReg != Reg-1) + break; + + LastReg = Reg; + Regs.push_back(Reg); + } + + if (Regs.empty()) + continue; + if (Regs.size() > 1 || LdrOpc == 0) { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) + .addReg(ARM::SP)); + for (unsigned i = 0, e = Regs.size(); i < e; ++i) + MIB.addReg(Regs[i], getDefRegState(true)); + if (DeleteRet) + MI->eraseFromParent(); + MI = MIB; + } else if (Regs.size() == 1) { + // If we adjusted the reg to PC from LR above, switch it back here. We + // only do that for LDM. + if (Regs[0] == ARM::PC) + Regs[0] = ARM::LR; + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP); + // ARM mode needs an extra reg0 here due to addrmode2. Will go away once + // that refactoring is complete (eventually). + if (LdrOpc == ARM::LDR_POST) { + MIB.addReg(0); + MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); + } else + MIB.addImm(4); + AddDefaultPred(MIB); + } + Regs.clear(); + } +} + +bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; + unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE; + unsigned FltOpc = ARM::VSTMDDB_UPD; + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register); + + return true; +} + +bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + + unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; + unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST; + unsigned FltOpc = ARM::VLDMDIA_UPD; + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea2Register); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea1Register); + + return true; +} + +// FIXME: Make generic? +static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, + const ARMBaseInstrInfo &TII) { + unsigned FnSize = 0; + for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + const MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); + I != E; ++I) + FnSize += TII.GetInstSizeInBytes(I); + } + return FnSize; +} + +/// estimateStackSize - Estimate and return the size of the frame. +/// FIXME: Make generic? +static unsigned estimateStackSize(MachineFunction &MF) { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + int Offset = 0; + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -FFI->getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (FFI->isDeadObjectIndex(i)) + continue; + Offset += FFI->getObjectSize(i); + unsigned Align = FFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + } + return (unsigned)Offset; +} + +/// estimateRSStackSizeLimit - Look at each instruction that references stack +/// frames and return the stack size limit beyond which some of these +/// instructions will require a scratch register during their expansion later. +// FIXME: Move to TII? +static unsigned estimateRSStackSizeLimit(MachineFunction &MF, + const TargetFrameLowering *TFI) { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned Limit = (1 << 12) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (!I->getOperand(i).isFI()) continue; + + // When using ADDri to get the address of a stack object, 255 is the + // largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == ARM::ADDri) { + Limit = std::min(Limit, (1U << 8) - 1); + break; + } + + // Otherwise check the addressing mode. + switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode3: + case ARMII::AddrModeT2_i8: + Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode5: + case ARMII::AddrModeT2_i8s4: + Limit = std::min(Limit, ((1U << 8) - 1) * 4); + break; + case ARMII::AddrModeT2_i12: + // i12 supports only positive offset so these will be converted to + // i8 opcodes. See llvm::rewriteT2FrameIndex. + if (TFI->hasFP(MF) && AFI->hasStackFrame()) + Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode4: + case ARMII::AddrMode6: + // Addressing modes 4 & 6 (load/store) instructions can't encode an + // immediate offset for stack references. + return 0; + default: + break; + } + break; // At most one FI per instruction + } + } + } + + return Limit; +} + +void +ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // This tells PEI to spill the FP as if it is any other callee-save register + // to take advantage the eliminateFrameIndex machinery. This also ensures it + // is spilled in the order specified by getCalleeSavedRegs() to make it easier + // to combine multiple loads / stores. + bool CanEliminateFrame = true; + bool CS1Spilled = false; + bool LRSpilled = false; + unsigned NumGPRSpills = 0; + SmallVector<unsigned, 4> UnspilledCS1GPRs; + SmallVector<unsigned, 4> UnspilledCS2GPRs; + const ARMBaseRegisterInfo *RegInfo = + static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + + // Spill R4 if Thumb2 function requires stack realignment - it will be used as + // scratch register. Also spill R4 if Thumb2 function has varsized objects, + // since it's not always possible to restore sp from fp in a single + // instruction. + // FIXME: It will be better just to find spare register here. + if (AFI->isThumb2Function() && + (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) + MF.getRegInfo().setPhysRegUsed(ARM::R4); + + if (AFI->isThumb1OnlyFunction()) { + // Spill LR if Thumb1 function uses variable length argument lists. + if (AFI->getVarArgsRegSaveSize() > 0) + MF.getRegInfo().setPhysRegUsed(ARM::LR); + + // Spill R4 if Thumb1 epilogue has to restore SP from FP since + // FIXME: It will be better just to find spare register here. + if (MFI->hasVarSizedObjects()) + MF.getRegInfo().setPhysRegUsed(ARM::R4); + } + + // Spill the BasePtr if it's used. + if (RegInfo->hasBasePointer(MF)) + MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); + + // Don't spill FP if the frame can be eliminated. This is determined + // by scanning the callee-save registers to see if any is used. + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + bool Spilled = false; + if (MF.getRegInfo().isPhysRegUsed(Reg)) { + Spilled = true; + CanEliminateFrame = false; + } else { + // Check alias registers too. + for (const unsigned *Aliases = + RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) { + if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { + Spilled = true; + CanEliminateFrame = false; + } + } + } + + if (!ARM::GPRRegisterClass->contains(Reg)) + continue; + + if (Spilled) { + NumGPRSpills++; + + if (!STI.isTargetDarwin()) { + if (Reg == ARM::LR) + LRSpilled = true; + CS1Spilled = true; + continue; + } + + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: case ARM::R5: + case ARM::R6: case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + if (!STI.isTargetDarwin()) { + UnspilledCS1GPRs.push_back(Reg); + continue; + } + + switch (Reg) { + case ARM::R4: case ARM::R5: + case ARM::R6: case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; + } + } + } + + bool ForceLRSpill = false; + if (!LRSpilled && AFI->isThumb1OnlyFunction()) { + unsigned FnSize = GetFunctionSizeInBytes(MF, TII); + // Force LR to be spilled if the Thumb function size is > 2048. This enables + // use of BL to implement far jump. If it turns out that it's not needed + // then the branch fix up path will undo it. + if (FnSize >= (1 << 11)) { + CanEliminateFrame = false; + ForceLRSpill = true; + } + } + + // If any of the stack slot references may be out of range of an immediate + // offset, make sure a register (or a spill slot) is available for the + // register scavenger. Note that if we're indexing off the frame pointer, the + // effective stack size is 4 bytes larger since the FP points to the stack + // slot of the previous FP. Also, if we have variable sized objects in the + // function, stack slot references will often be negative, and some of + // our instructions are positive-offset only, so conservatively consider + // that case to want a spill slot (or register) as well. Similarly, if + // the function adjusts the stack pointer during execution and the + // adjustments aren't already part of our stack size estimate, our offset + // calculations may be off, so be conservative. + // FIXME: We could add logic to be more precise about negative offsets + // and which instructions will need a scratch register for them. Is it + // worth the effort and added fragility? + bool BigStack = + (RS && + (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + estimateRSStackSizeLimit(MF, this))) + || MFI->hasVarSizedObjects() + || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); + + bool ExtraCSSpill = false; + if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { + AFI->setHasStackFrame(true); + + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. + // Spill LR as well so we can fold BX_RET to the registers restore (LDM). + if (!LRSpilled && CS1Spilled) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + NumGPRSpills++; + UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), + UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + ForceLRSpill = false; + ExtraCSSpill = true; + } + + if (hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(FramePtr); + NumGPRSpills++; + } + + // If stack and double are 8-byte aligned and we are spilling an odd number + // of GPRs, spill one extra callee save GPR so we won't have to pad between + // the integer and double callee save areas. + unsigned TargetAlign = getStackAlignment(); + if (TargetAlign == 8 && (NumGPRSpills & 1)) { + if (CS1Spilled && !UnspilledCS1GPRs.empty()) { + for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { + unsigned Reg = UnspilledCS1GPRs[i]; + // Don't spill high register if the function is thumb1 + if (!AFI->isThumb1OnlyFunction() || + isARMLowRegister(Reg) || Reg == ARM::LR) { + MF.getRegInfo().setPhysRegUsed(Reg); + if (!RegInfo->isReservedReg(MF, Reg)) + ExtraCSSpill = true; + break; + } + } + } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { + unsigned Reg = UnspilledCS2GPRs.front(); + MF.getRegInfo().setPhysRegUsed(Reg); + if (!RegInfo->isReservedReg(MF, Reg)) + ExtraCSSpill = true; + } + } + + // Estimate if we might need to scavenge a register at some point in order + // to materialize a stack offset. If so, either spill one additional + // callee-saved register or reserve a special spill slot to facilitate + // register scavenging. Thumb1 needs a spill slot for stack pointer + // adjustments also, even when the frame itself is small. + if (BigStack && !ExtraCSSpill) { + // If any non-reserved CS register isn't spilled, just spill one or two + // extra. That should take care of it! + unsigned NumExtras = TargetAlign / 4; + SmallVector<unsigned, 2> Extras; + while (NumExtras && !UnspilledCS1GPRs.empty()) { + unsigned Reg = UnspilledCS1GPRs.back(); + UnspilledCS1GPRs.pop_back(); + if (!RegInfo->isReservedReg(MF, Reg) && + (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || + Reg == ARM::LR)) { + Extras.push_back(Reg); + NumExtras--; + } + } + // For non-Thumb1 functions, also check for hi-reg CS registers + if (!AFI->isThumb1OnlyFunction()) { + while (NumExtras && !UnspilledCS2GPRs.empty()) { + unsigned Reg = UnspilledCS2GPRs.back(); + UnspilledCS2GPRs.pop_back(); + if (!RegInfo->isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + } + if (Extras.size() && NumExtras == 0) { + for (unsigned i = 0, e = Extras.size(); i != e; ++i) { + MF.getRegInfo().setPhysRegUsed(Extras[i]); + } + } else if (!AFI->isThumb1OnlyFunction()) { + // note: Thumb1 functions spill to R12, not the stack. Reserve a slot + // closest to SP or frame pointer. + const TargetRegisterClass *RC = ARM::GPRRegisterClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } + } + } + + if (ForceLRSpill) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + AFI->setLRIsSpilledForFarJump(true); + } +} diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h new file mode 100644 index 0000000..1288b70 --- /dev/null +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -0,0 +1,74 @@ +//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_FRAMEINFO_H +#define ARM_FRAMEINFO_H + +#include "ARM.h" +#include "ARMSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class ARMSubtarget; + +class ARMFrameLowering : public TargetFrameLowering { +protected: + const ARMSubtarget &STI; + +public: + explicit ARMFrameLowering(const ARMSubtarget &sti) + : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), + STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasFP(const MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; + bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const; + int ResolveFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg, int SPAdj) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + + private: + void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, + unsigned StrOpc, bool NoGap, + bool(*Func)(unsigned, bool)) const; + void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, + unsigned LdrOpc, bool isVarArg, bool NoGap, + bool(*Func)(unsigned, bool)) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp index 85b0c6c..3f02383 100644 --- a/lib/Target/ARM/ARMGlobalMerge.cpp +++ b/lib/Target/ARM/ARMGlobalMerge.cpp @@ -12,7 +12,8 @@ // global). Such a transformation can significantly reduce the register pressure // when many globals are involved. // -// For example, consider the code which touches several global variables at once: +// For example, consider the code which touches several global variables at +// once: // // static int foo[N], bar[N], baz[N]; // @@ -48,7 +49,7 @@ // str r0, [r5], #4 // // note that we saved 2 registers here almostly "for free". -// ===----------------------------------------------------------------------===// +// ===---------------------------------------------------------------------===// #define DEBUG_TYPE "arm-global-merge" #include "ARM.h" @@ -64,16 +65,17 @@ #include "llvm/Pass.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; namespace { - class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass { + class ARMGlobalMerge : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// target type sizes. const TargetLowering *TLI; bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, - Module &M, bool) const; + Module &M, bool isConst) const; public: static char ID; // Pass identification, replacement for typeid. @@ -81,7 +83,7 @@ namespace { : FunctionPass(ID), TLI(tli) {} virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function& F); + virtual bool runOnFunction(Function &F); const char *getPassName() const { return "Merge internal globals"; @@ -95,13 +97,11 @@ namespace { struct GlobalCmp { const TargetData *TD; - GlobalCmp(const TargetData *td): - TD(td) { } + GlobalCmp(const TargetData *td) : TD(td) { } - bool operator() (const GlobalVariable* GV1, - const GlobalVariable* GV2) { - const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType(); - const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType(); + bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) { + const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); + const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); } @@ -130,27 +130,27 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, uint64_t MergedSize = 0; std::vector<const Type*> Tys; std::vector<Constant*> Inits; - for (j = i; MergedSize < MaxOffset && j != e; ++j) { - const Type* Ty = Globals[j]->getType()->getElementType(); + for (j = i; j != e; ++j) { + const Type *Ty = Globals[j]->getType()->getElementType(); + MergedSize += TD->getTypeAllocSize(Ty); + if (MergedSize > MaxOffset) { + break; + } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); - MergedSize += TD->getTypeAllocSize(Ty); } - StructType* MergedTy = StructType::get(M.getContext(), Tys); - Constant* MergedInit = ConstantStruct::get(MergedTy, Inits); - GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst, + StructType *MergedTy = StructType::get(M.getContext(), Tys); + Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); + GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, GlobalValue::InternalLinkage, - MergedInit, "merged"); + MergedInit, "_MergedGlobals"); for (size_t k = i; k < j; ++k) { - SmallVector<Constant*, 2> Idx; - Idx.push_back(ConstantInt::get(Int32Ty, 0)); - Idx.push_back(ConstantInt::get(Int32Ty, k-i)); - - Constant* GEP = - ConstantExpr::getInBoundsGetElementPtr(MergedGV, - &Idx[0], Idx.size()); - + Constant *Idx[2] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, k-i) + }; + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); } @@ -161,8 +161,8 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, } -bool ARMGlobalMerge::doInitialization(Module& M) { - SmallVector<GlobalVariable*, 16> Globals, ConstGlobals; +bool ARMGlobalMerge::doInitialization(Module &M) { + SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals; const TargetData *TD = TLI->getTargetData(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; @@ -183,8 +183,11 @@ bool ARMGlobalMerge::doInitialization(Module& M) { I->getName().startswith(".llvm.")) continue; - if (TD->getTypeAllocSize(I->getType()) < MaxOffset) { - if (I->isConstant()) + if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) { + const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering(); + if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal()) + BSSGlobals.push_back(I); + else if (I->isConstant()) ConstGlobals.push_back(I); else Globals.push_back(I); @@ -193,17 +196,19 @@ bool ARMGlobalMerge::doInitialization(Module& M) { if (Globals.size() > 1) Changed |= doMerge(Globals, M, false); + if (BSSGlobals.size() > 1) + Changed |= doMerge(BSSGlobals, M, false); + // FIXME: This currently breaks the EH processing due to way how the // typeinfo detection works. We might want to detect the TIs and ignore // them in the future. - // if (ConstGlobals.size() > 1) // Changed |= doMerge(ConstGlobals, M, true); return Changed; } -bool ARMGlobalMerge::runOnFunction(Function& F) { +bool ARMGlobalMerge::runOnFunction(Function &F) { return false; } diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp new file mode 100644 index 0000000..676b01e --- /dev/null +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -0,0 +1,121 @@ +//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMHazardRecognizer.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, + const TargetRegisterInfo &TRI) { + // FIXME: Detect integer instructions properly. + const TargetInstrDesc &TID = MI->getDesc(); + unsigned Domain = TID.TSFlags & ARMII::DomainMask; + if (Domain == ARMII::DomainVFP) { + unsigned Opcode = MI->getOpcode(); + if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || + Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + } else if (Domain == ARMII::DomainNEON) { + if (MI->getDesc().mayStore() || MI->getDesc().mayLoad()) + return false; + } else + return false; + return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); +} + +ScheduleHazardRecognizer::HazardType +ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead"); + + MachineInstr *MI = SU->getInstr(); + + if (!MI->isDebugValue()) { + if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1]) + return Hazard; + + // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following + // a VMLA / VMLS will cause 4 cycle stall. + const TargetInstrDesc &TID = MI->getDesc(); + if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { + MachineInstr *DefMI = LastMI; + const TargetInstrDesc &LastTID = LastMI->getDesc(); + // Skip over one non-VFP / NEON instruction. + if (!LastTID.isBarrier() && + (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { + MachineBasicBlock::iterator I = LastMI; + if (I != LastMI->getParent()->begin()) { + I = llvm::prior(I); + DefMI = &*I; + } + } + + if (TII.isFpMLxInstruction(DefMI->getOpcode()) && + (TII.canCauseFpMLxStall(MI->getOpcode()) || + hasRAWHazard(DefMI, MI, TRI))) { + // Try to schedule another instruction for the next 4 cycles. + if (FpMLxStalls == 0) + FpMLxStalls = 4; + return Hazard; + } + } + } + + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void ARMHazardRecognizer::Reset() { + LastMI = 0; + FpMLxStalls = 0; + ITBlockSize = 0; + ScoreboardHazardRecognizer::Reset(); +} + +void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { + MachineInstr *MI = SU->getInstr(); + unsigned Opcode = MI->getOpcode(); + if (ITBlockSize) { + --ITBlockSize; + } else if (Opcode == ARM::t2IT) { + unsigned Mask = MI->getOperand(1).getImm(); + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + ITBlockSize = 4 - NumTZ; + MachineBasicBlock::iterator I = MI; + for (unsigned i = 0; i < ITBlockSize; ++i) { + // Advance to the next instruction, skipping any dbg_value instructions. + do { + ++I; + } while (I->isDebugValue()); + ITBlockMIs[ITBlockSize-1-i] = &*I; + } + } + + if (!MI->isDebugValue()) { + LastMI = MI; + FpMLxStalls = 0; + } + + ScoreboardHazardRecognizer::EmitInstruction(SU); +} + +void ARMHazardRecognizer::AdvanceCycle() { + if (FpMLxStalls && --FpMLxStalls == 0) + // Stalled for 4 cycles but still can't schedule any other instructions. + LastMI = 0; + ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void ARMHazardRecognizer::RecedeCycle() { + llvm_unreachable("reverse ARM hazard checking unsupported"); +} diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h new file mode 100644 index 0000000..2bc218d --- /dev/null +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -0,0 +1,54 @@ +//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling ARM functions. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMHAZARDRECOGNIZER_H +#define ARMHAZARDRECOGNIZER_H + +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" + +namespace llvm { + +class ARMBaseInstrInfo; +class ARMBaseRegisterInfo; +class ARMSubtarget; +class MachineInstr; + +class ARMHazardRecognizer : public ScoreboardHazardRecognizer { + const ARMBaseInstrInfo &TII; + const ARMBaseRegisterInfo &TRI; + const ARMSubtarget &STI; + + MachineInstr *LastMI; + unsigned FpMLxStalls; + unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. + MachineInstr *ITBlockMIs[4]; + +public: + ARMHazardRecognizer(const InstrItineraryData *ItinData, + const ARMBaseInstrInfo &tii, + const ARMBaseRegisterInfo &tri, + const ARMSubtarget &sti, + const ScheduleDAG *DAG) : + ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), + TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {} + + virtual HazardType getHazardType(SUnit *SU, int Stalls); + virtual void Reset(); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void RecedeCycle(); +}; + +} // end namespace llvm + +#endif // ARMHAZARDRECOGNIZER_H diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 51a30c1..a506cff 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "arm-isel" #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMAddressingModes.h" #include "ARMTargetMachine.h" #include "llvm/CallingConv.h" @@ -41,13 +42,25 @@ DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), cl::init(false)); +static cl::opt<bool> +CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, + cl::desc("Check fp vmla / vmls hazard at isel time"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. /// namespace { + +enum AddrMode2Type { + AM2_BASE, // Simple AM2 (+-imm12) + AM2_SHOP // Shifter-op AM2 +}; + class ARMDAGToDAGISel : public SelectionDAGISel { ARMBaseTargetMachine &TM; + const ARMBaseInstrInfo *TII; /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. @@ -57,7 +70,8 @@ public: explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget<ARMSubtarget>()) { + TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())), + Subtarget(&TM.getSubtarget<ARMSubtarget>()) { } virtual const char *getPassName() const { @@ -72,60 +86,101 @@ public: SDNode *Select(SDNode *N); - bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A, + + bool hasNoVMLxHazardUse(SDNode *N) const; + bool isShifterOpProfitable(const SDValue &Shift, + ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); + bool SelectShifterOperandReg(SDValue N, SDValue &A, SDValue &B, SDValue &C); - bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Offset, SDValue &Opc); + bool SelectShiftShifterOperandReg(SDValue N, SDValue &A, + SDValue &B, SDValue &C); + bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); + bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); + + AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Opc); + bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Opc) { + return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; + } + + bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Opc) { + return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; + } + + bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &Opc) { + SelectAddrMode2Worker(N, Base, Offset, Opc); +// return SelectAddrMode2ShOp(N, Base, Offset, Opc); + // This always matches one way or another. + return true; + } + bool SelectAddrMode2Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base, + bool SelectAddrMode3(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); bool SelectAddrMode3Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr, - SDValue &Mode); - bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, + bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); - bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align); - - bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, - SDValue &Label); - - bool SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Offset); - bool SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale, - SDValue &Base, SDValue &OffImm, - SDValue &Offset); - bool SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base, - SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base, - SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base, - SDValue &OffImm, SDValue &Offset); - bool SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base, - SDValue &OffImm); - - bool SelectT2ShifterOperandReg(SDNode *Op, SDValue N, + bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); + + bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); + + // Thumb Addressing Modes: + bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); + bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset, + unsigned Scale); + bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset); + bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset); + bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset); + bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, + SDValue &OffImm); + bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, + SDValue &OffImm); + bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, + SDValue &OffImm); + bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, + SDValue &OffImm); + bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); + + // Thumb 2 Addressing Modes: + bool SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg, SDValue &Opc); - bool SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base, - SDValue &OffImm); - bool SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); + bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm); - bool SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base, - SDValue &OffImm); - bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base, + bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); + inline bool is_so_imm(unsigned Imm) const { + return ARM_AM::getSOImmVal(Imm) != -1; + } + + inline bool is_so_imm_not(unsigned Imm) const { + return ARM_AM::getSOImmVal(~Imm) != -1; + } + + inline bool is_t2_so_imm(unsigned Imm) const { + return ARM_AM::getT2SOImmVal(Imm) != -1; + } + + inline bool is_t2_so_imm_not(unsigned Imm) const { + return ARM_AM::getT2SOImmVal(~Imm) != -1; + } + inline bool Pred_so_imm(SDNode *inN) const { ConstantSDNode *N = cast<ConstantSDNode>(inN); - return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; + return is_so_imm(N->getZExtValue()); } inline bool Pred_t2_so_imm(SDNode *inN) const { ConstantSDNode *N = cast<ConstantSDNode>(inN); - return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1; + return is_t2_so_imm(N->getZExtValue()); } // Include the pieces autogenerated from the target description. @@ -141,22 +196,30 @@ private: /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// loads of D registers and even subregs and odd subregs of Q registers. /// For NumVecs <= 2, QOpcodes1 is not used. - SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, + SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, + unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectVST - Select NEON store intrinsics. NumVecs should /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// stores of D registers and even subregs and odd subregs of Q registers. /// For NumVecs <= 2, QOpcodes1 is not used. - SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, + SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, + unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for - /// load/store of D registers and even subregs and odd subregs of Q registers. - SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs, - unsigned *DOpcodes, unsigned *QOpcodes0, - unsigned *QOpcodes1); + /// load/store of D registers and Q registers. + SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, + bool isUpdating, unsigned NumVecs, + unsigned *DOpcodes, unsigned *QOpcodes); + + /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs + /// should be 2, 3 or 4. The opcode array specifies the instructions used + /// for loading D registers. (Q registers are not supported.) + SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, + unsigned *Opcodes); /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be @@ -174,10 +237,10 @@ private: SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); - SDNode *SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); - SDNode *SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, + SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -199,9 +262,8 @@ private: SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - // Form sequences of 8 consecutive D registers. - SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, - SDValue V4, SDValue V5, SDValue V6, SDValue V7); + // Get the alignment operand for a NEON VLD or VST instruction. + SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector); }; } @@ -229,9 +291,85 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { isInt32Immediate(N->getOperand(1).getNode(), Imm); } +/// \brief Check whether a particular node is a constant value representable as +/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax). +/// +/// \param ScaledConstant [out] - On success, the pre-scaled constant value. +static bool isScaledConstantInRange(SDValue Node, unsigned Scale, + int RangeMin, int RangeMax, + int &ScaledConstant) { + assert(Scale && "Invalid scale!"); + + // Check that this is a constant. + const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); + if (!C) + return false; + + ScaledConstant = (int) C->getZExtValue(); + if ((ScaledConstant % Scale) != 0) + return false; + + ScaledConstant /= Scale; + return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; +} + +/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS +/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at +/// least on current ARM implementations) which should be avoidded. +bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { + if (OptLevel == CodeGenOpt::None) + return true; + + if (!CheckVMLxHazard) + return true; + + if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9()) + return true; + + if (!N->hasOneUse()) + return false; + + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::CopyToReg) + return true; + if (Use->isMachineOpcode()) { + const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode()); + if (TID.mayStore()) + return true; + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return true; + // vmlx feeding into another vmlx. We actually want to unfold + // the use later in the MLxExpansion pass. e.g. + // vmla + // vmla (stall 8 cycles) + // + // vmul (5 cycles) + // vadd (5 cycles) + // vmla + // This adds up to about 18 - 19 cycles. + // + // vmla + // vmul (stall 4 cycles) + // vadd adds up to about 14 cycles. + return TII->isFpMLxInstruction(Opcode); + } + + return false; +} + +bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, + ARM_AM::ShiftOpc ShOpcVal, + unsigned ShAmt) { + if (!Subtarget->isCortexA9()) + return true; + if (Shift.hasOneUse()) + return true; + // R << 2 is free. + return ShOpcVal == ARM_AM::lsl && ShAmt == 2; +} -bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, - SDValue N, +bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, SDValue &BaseReg, SDValue &ShReg, SDValue &Opc) { @@ -251,16 +389,92 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op, ShImmVal = RHS->getZExtValue() & 31; } else { ShReg = N.getOperand(1); + if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal)) + return false; } Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), MVT::i32); return true; } -bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N, - SDValue &Base, SDValue &Offset, +bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N, + SDValue &BaseReg, + SDValue &ShReg, + SDValue &Opc) { + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + // Do not check isShifterOpProfitable. This must return true. + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + ShReg = CurDAG->getRegister(0, MVT::i32); + ShImmVal = RHS->getZExtValue() & 31; + } else { + ShReg = N.getOperand(1); + } + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), + MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, + SDValue &Base, + SDValue &OffImm) { + // Match simple R + imm12 operands. + + // Base only. + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && + !CurDAG->isBaseWithConstantOffset(N)) { + if (N.getOpcode() == ISD::FrameIndex) { + // Match frame index. + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + Base = N.getOperand(0); + } else + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + + if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; + } + } + + // Base only. + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + + + +bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { - if (N.getOpcode() == ISD::MUL) { + if (N.getOpcode() == ISD::MUL && + (!Subtarget->isCortexA9() || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -283,7 +497,114 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N, } } - if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) { + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && + // ISD::OR that is equivalent to an ISD::ADD. + !CurDAG->isBaseWithConstantOffset(N)) + return false; + + // Leave simple R +/- imm12 operands for LDRi12 + if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, + -0x1000+1, 0x1000, RHSC)) // 12 bits. + return false; + } + + if (Subtarget->isCortexA9() && !N.hasOneUse()) + // Compute R +/- (R << N) and reuse it. + return false; + + // Otherwise this is R +/- [possibly shifted] R. + ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); + unsigned ShAmt = 0; + + Base = N.getOperand(0); + Offset = N.getOperand(1); + + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't fold + // it. + if (ConstantSDNode *Sh = + dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { + ShAmt = Sh->getZExtValue(); + if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) + Offset = N.getOperand(1).getOperand(0); + else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } + } else { + ShOpcVal = ARM_AM::no_shift; + } + } + + // Try matching (R shl C) + (R). + if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && + !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { + ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); + if (ShOpcVal != ARM_AM::no_shift) { + // Check to see if the RHS of the shift is a constant, if not, we can't + // fold it. + if (ConstantSDNode *Sh = + dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { + ShAmt = Sh->getZExtValue(); + if (!Subtarget->isCortexA9() || + (N.hasOneUse() && + isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + Offset = N.getOperand(0).getOperand(0); + Base = N.getOperand(1); + } else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } + } else { + ShOpcVal = ARM_AM::no_shift; + } + } + } + + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), + MVT::i32); + return true; +} + + + + +//----- + +AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, + SDValue &Base, + SDValue &Offset, + SDValue &Opc) { + if (N.getOpcode() == ISD::MUL && + (!Subtarget->isCortexA9() || N.hasOneUse())) { + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + // X * [3,5,9] -> X + X * [2,4,8] etc. + int RHSC = (int)RHS->getZExtValue(); + if (RHSC & 1) { + RHSC = RHSC & ~1; + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; + } + if (isPowerOf2_32(RHSC)) { + unsigned ShAmt = Log2_32(RHSC); + Base = Offset = N.getOperand(0); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, + ARM_AM::lsl), + MVT::i32); + return AM2_SHOP; + } + } + } + } + + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && + // ISD::OR that is equivalent to an ADD. + !CurDAG->isBaseWithConstantOffset(N)) { Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); @@ -297,36 +618,45 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N, Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, ARM_AM::no_shift), MVT::i32); - return true; + return AM2_BASE; } // Match simple R +/- imm12 operands. - if (N.getOpcode() == ISD::ADD) - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - if ((RHSC >= 0 && RHSC < 0x1000) || - (RHSC < 0 && RHSC > -0x1000)) { // 12 bits. - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } - Offset = CurDAG->getRegister(0, MVT::i32); + if (N.getOpcode() != ISD::SUB) { + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, + -0x1000+1, 0x1000, RHSC)) { // 12 bits. + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + Offset = CurDAG->getRegister(0, MVT::i32); - ARM_AM::AddrOpc AddSub = ARM_AM::add; - if (RHSC < 0) { - AddSub = ARM_AM::sub; - RHSC = - RHSC; - } - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, - ARM_AM::no_shift), - MVT::i32); - return true; + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = - RHSC; } + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, + ARM_AM::no_shift), + MVT::i32); + return AM2_BASE; } + } + + if (Subtarget->isCortexA9() && !N.hasOneUse()) { + // Compute R +/- (R << N) and reuse it. + Base = N; + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, + ARM_AM::no_shift), + MVT::i32); + return AM2_BASE; + } // Otherwise this is R +/- [possibly shifted] R. - ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub; + ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); unsigned ShAmt = 0; @@ -339,14 +669,20 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N, if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { ShAmt = Sh->getZExtValue(); - Offset = N.getOperand(1).getOperand(0); + if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) + Offset = N.getOperand(1).getOperand(0); + else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } } else { ShOpcVal = ARM_AM::no_shift; } } // Try matching (R shl C) + (R). - if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) { + if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && + !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -354,8 +690,15 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N, if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - Offset = N.getOperand(0).getOperand(0); - Base = N.getOperand(1); + if (!Subtarget->isCortexA9() || + (N.hasOneUse() && + isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + Offset = N.getOperand(0).getOperand(0); + Base = N.getOperand(1); + } else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } } else { ShOpcVal = ARM_AM::no_shift; } @@ -364,7 +707,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N, Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), MVT::i32); - return true; + return AM2_SHOP; } bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, @@ -375,15 +718,13 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, : cast<StoreSDNode>(Op)->getAddressingMode(); ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) ? ARM_AM::add : ARM_AM::sub; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { - int Val = (int)C->getZExtValue(); - if (Val >= 0 && Val < 0x1000) { // 12 bits. - Offset = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, - ARM_AM::no_shift), - MVT::i32); - return true; - } + int Val; + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, + ARM_AM::no_shift), + MVT::i32); + return true; } Offset = N; @@ -394,7 +735,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, // it. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { ShAmt = Sh->getZExtValue(); - Offset = N.getOperand(0); + if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) + Offset = N.getOperand(0); + else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } } else { ShOpcVal = ARM_AM::no_shift; } @@ -406,7 +752,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, } -bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { if (N.getOpcode() == ISD::SUB) { @@ -417,7 +763,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N, return true; } - if (N.getOpcode() != ISD::ADD) { + if (!CurDAG->isBaseWithConstantOffset(N)) { Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); @@ -429,25 +775,23 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N, } // If the RHS is +/- imm8, fold into addr mode. - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - if ((RHSC >= 0 && RHSC < 256) || - (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed. - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } - Offset = CurDAG->getRegister(0, MVT::i32); + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, + -256 + 1, 256, RHSC)) { // 8 bits. + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } + Offset = CurDAG->getRegister(0, MVT::i32); - ARM_AM::AddrOpc AddSub = ARM_AM::add; - if (RHSC < 0) { - AddSub = ARM_AM::sub; - RHSC = - RHSC; - } - Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32); - return true; + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = -RHSC; } + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32); + return true; } Base = N.getOperand(0); @@ -464,13 +808,11 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, : cast<StoreSDNode>(Op)->getAddressingMode(); ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) ? ARM_AM::add : ARM_AM::sub; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { - int Val = (int)C->getZExtValue(); - if (Val >= 0 && Val < 256) { - Offset = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32); - return true; - } + int Val; + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32); + return true; } Offset = N; @@ -478,16 +820,9 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N, - SDValue &Addr, SDValue &Mode) { - Addr = N; - Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32); - return true; -} - -bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset) { - if (N.getOpcode() != ISD::ADD) { + if (!CurDAG->isBaseWithConstantOffset(N)) { Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); @@ -503,28 +838,23 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, } // If the RHS is +/- imm8, fold into addr mode. - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - if ((RHSC & 3) == 0) { // The constant is implicitly multiplied by 4. - RHSC >>= 2; - if ((RHSC >= 0 && RHSC < 256) || - (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed. - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, + -256 + 1, 256, RHSC)) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } - ARM_AM::AddrOpc AddSub = ARM_AM::add; - if (RHSC < 0) { - AddSub = ARM_AM::sub; - RHSC = - RHSC; - } - Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), - MVT::i32); - return true; - } + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (RHSC < 0) { + AddSub = ARM_AM::sub; + RHSC = -RHSC; } + Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), + MVT::i32); + return true; } Base = N; @@ -533,30 +863,50 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N, - SDValue &Addr, SDValue &Align) { +bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, + SDValue &Align) { Addr = N; - // Default to no alignment. - Align = CurDAG->getTargetConstant(0, MVT::i32); + + unsigned Alignment = 0; + if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) { + // This case occurs only for VLD1-lane/dup and VST1-lane instructions. + // The maximum alignment is equal to the memory size being referenced. + unsigned LSNAlign = LSN->getAlignment(); + unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8; + if (LSNAlign > MemSize && MemSize > 1) + Alignment = MemSize; + } else { + // All other uses of addrmode6 are for intrinsics. For now just record + // the raw alignment value; it will be refined later based on the legal + // alignment operands for the intrinsic. + Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment(); + } + + Align = CurDAG->getTargetConstant(Alignment, MVT::i32); return true; } -bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N, +bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { Offset = N.getOperand(0); SDValue N1 = N.getOperand(1); - Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), - MVT::i32); + Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), + MVT::i32); return true; } + return false; } -bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N, + +//===----------------------------------------------------------------------===// +// Thumb Addressing Modes +//===----------------------------------------------------------------------===// + +bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset){ - // FIXME dl should come from the parent load or store, not the address - if (N.getOpcode() != ISD::ADD) { + if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); if (!NC || !NC->isNullValue()) return false; @@ -571,82 +921,137 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N, } bool -ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDNode *Op, SDValue N, - unsigned Scale, SDValue &Base, - SDValue &OffImm, SDValue &Offset) { +ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base, + SDValue &Offset, unsigned Scale) { + if (Scale == 4) { + SDValue TmpBase, TmpOffImm; + if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) + return false; // We want to select tLDRspi / tSTRspi instead. + + if (N.getOpcode() == ARMISD::Wrapper && + N.getOperand(0).getOpcode() == ISD::TargetConstantPool) + return false; // We want to select tLDRpci instead. + } + + if (!CurDAG->isBaseWithConstantOffset(N)) + return false; + + // Thumb does not have [sp, r] address mode. + RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); + RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1)); + if ((LHSR && LHSR->getReg() == ARM::SP) || + (RHSR && RHSR->getReg() == ARM::SP)) + return false; + + // FIXME: Why do we explicitly check for a match here and then return false? + // Presumably to allow something else to match, but shouldn't this be + // documented? + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) + return false; + + Base = N.getOperand(0); + Offset = N.getOperand(1); + return true; +} + +bool +ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N, + SDValue &Base, + SDValue &Offset) { + return SelectThumbAddrModeRI(N, Base, Offset, 1); +} + +bool +ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N, + SDValue &Base, + SDValue &Offset) { + return SelectThumbAddrModeRI(N, Base, Offset, 2); +} + +bool +ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N, + SDValue &Base, + SDValue &Offset) { + return SelectThumbAddrModeRI(N, Base, Offset, 4); +} + +bool +ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, + SDValue &Base, SDValue &OffImm) { if (Scale == 4) { SDValue TmpBase, TmpOffImm; - if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm)) + if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm)) return false; // We want to select tLDRspi / tSTRspi instead. + if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() == ISD::TargetConstantPool) return false; // We want to select tLDRpci instead. } - if (N.getOpcode() != ISD::ADD) { + if (!CurDAG->isBaseWithConstantOffset(N)) { if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { Base = N.getOperand(0); - } else + } else { Base = N; + } - Offset = CurDAG->getRegister(0, MVT::i32); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } - // Thumb does not have [sp, r] address mode. RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1)); if ((LHSR && LHSR->getReg() == ARM::SP) || (RHSR && RHSR->getReg() == ARM::SP)) { + ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0)); + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); + unsigned LHSC = LHS ? LHS->getZExtValue() : 0; + unsigned RHSC = RHS ? RHS->getZExtValue() : 0; + + // Thumb does not have [sp, #imm5] address mode for non-zero imm5. + if (LHSC != 0 || RHSC != 0) return false; + Base = N; - Offset = CurDAG->getRegister(0, MVT::i32); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } // If the RHS is + imm5 * scale, fold into addr mode. - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - if ((RHSC & (Scale-1)) == 0) { // The constant is implicitly multiplied. - RHSC /= Scale; - if (RHSC >= 0 && RHSC < 32) { - Base = N.getOperand(0); - Offset = CurDAG->getRegister(0, MVT::i32); - OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); - return true; - } - } + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { + Base = N.getOperand(0); + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; } Base = N.getOperand(0); - Offset = N.getOperand(1); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } -bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDNode *Op, SDValue N, - SDValue &Base, SDValue &OffImm, - SDValue &Offset) { - return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset); +bool +ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, + SDValue &OffImm) { + return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); } -bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDNode *Op, SDValue N, - SDValue &Base, SDValue &OffImm, - SDValue &Offset) { - return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset); +bool +ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, + SDValue &OffImm) { + return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); } -bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDNode *Op, SDValue N, - SDValue &Base, SDValue &OffImm, - SDValue &Offset) { - return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset); +bool +ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, + SDValue &OffImm) { + return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); } -bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N, - SDValue &Base, SDValue &OffImm) { +bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, + SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); @@ -654,35 +1059,35 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N, return true; } - if (N.getOpcode() != ISD::ADD) + if (!CurDAG->isBaseWithConstantOffset(N)) return false; RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0)); if (N.getOperand(0).getOpcode() == ISD::FrameIndex || (LHSR && LHSR->getReg() == ARM::SP)) { // If the RHS is + imm8 * scale, fold into addr mode. - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - if ((RHSC & 3) == 0) { // The constant is implicitly multiplied. - RHSC >>= 2; - if (RHSC >= 0 && RHSC < 256) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } - OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); - return true; - } + int RHSC; + if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); } + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; } } return false; } -bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N, - SDValue &BaseReg, + +//===----------------------------------------------------------------------===// +// Thumb 2 Addressing Modes +//===----------------------------------------------------------------------===// + + +bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg, SDValue &Opc) { if (DisableShifterOp) return false; @@ -704,19 +1109,22 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N, return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R + imm12 operands. // Base only. - if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) { + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && + !CurDAG->isBaseWithConstantOffset(N)) { if (N.getOpcode() == ISD::FrameIndex) { - // Match frame index... + // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; - } else if (N.getOpcode() == ARMISD::Wrapper && + } + + if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { Base = N.getOperand(0); @@ -729,7 +1137,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N, } if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - if (SelectT2AddrModeImm8(Op, N, Base, OffImm)) + if (SelectT2AddrModeImm8(N, Base, OffImm)) // Let t2LDRi8 handle (R - imm8). return false; @@ -754,24 +1162,26 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N, return true; } -bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDNode *Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R - imm8 operands. - if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) { - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getSExtValue(); - if (N.getOpcode() == ISD::SUB) - RHSC = -RHSC; - - if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } - OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); - return true; + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && + !CurDAG->isBaseWithConstantOffset(N)) + return false; + + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + int RHSC = (int)RHS->getSExtValue(); + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + + if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); } + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); + return true; } } @@ -784,52 +1194,22 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) ? cast<LoadSDNode>(Op)->getAddressingMode() : cast<StoreSDNode>(Op)->getAddressingMode(); - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N)) { - int RHSC = (int)RHS->getZExtValue(); - if (RHSC >= 0 && RHSC < 0x100) { // 8 bits. - OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) - ? CurDAG->getTargetConstant(RHSC, MVT::i32) - : CurDAG->getTargetConstant(-RHSC, MVT::i32); - return true; - } - } - - return false; -} - -bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, - SDValue &Base, SDValue &OffImm) { - if (N.getOpcode() == ISD::ADD) { - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - // 8 bits. - if (((RHSC & 0x3) == 0) && - ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { - Base = N.getOperand(0); - OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); - return true; - } - } - } else if (N.getOpcode() == ISD::SUB) { - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - // 8 bits. - if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { - Base = N.getOperand(0); - OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32); - return true; - } - } + int RHSC; + if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. + OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) + ? CurDAG->getTargetConstant(RHSC, MVT::i32) + : CurDAG->getTargetConstant(-RHSC, MVT::i32); + return true; } return false; } -bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N, +bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm) { // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. - if (N.getOpcode() != ISD::ADD) + if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) return false; // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. @@ -841,6 +1221,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N, return false; } + if (Subtarget->isCortexA9() && !N.hasOneUse()) { + // Compute R + (R << [1,2,3]) and reuse it. + Base = N; + return false; + } + // Look for (R + R) or (R + (R << [1,2,3])). unsigned ShAmt = 0; Base = N.getOperand(0); @@ -859,11 +1245,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N, // it. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { ShAmt = Sh->getZExtValue(); - if (ShAmt >= 4) { + if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) + OffReg = OffReg.getOperand(0); + else { ShAmt = 0; ShOpcVal = ARM_AM::no_shift; - } else - OffReg = OffReg.getOperand(0); + } } else { ShOpcVal = ARM_AM::no_shift; } @@ -1045,52 +1432,43 @@ SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); } -/// OctoDRegs - Form 8 consecutive D registers. -/// -SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1, - SDValue V2, SDValue V3, - SDValue V4, SDValue V5, - SDValue V6, SDValue V7) { - DebugLoc dl = V0.getNode()->getDebugLoc(); - SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); - SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); - SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); - SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); - SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32); - SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32); - SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32); - SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32); - const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3, - V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16); -} - -/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type -/// for a 64-bit subregister of the vector. -static EVT GetNEONSubregVT(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled NEON type"); - case MVT::v16i8: return MVT::v8i8; - case MVT::v8i16: return MVT::v4i16; - case MVT::v4f32: return MVT::v2f32; - case MVT::v4i32: return MVT::v2i32; - case MVT::v2i64: return MVT::v1i64; - } +/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand +/// of a NEON VLD or VST instruction. The supported values depend on the +/// number of registers being loaded. +SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs, + bool is64BitVector) { + unsigned NumRegs = NumVecs; + if (!is64BitVector && NumVecs < 3) + NumRegs *= 2; + + unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); + if (Alignment >= 32 && NumRegs == 4) + Alignment = 32; + else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) + Alignment = 16; + else if (Alignment >= 8) + Alignment = 8; + else + Alignment = 0; + + return CurDAG->getTargetConstant(Alignment, MVT::i32); } -SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, +SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) + unsigned AddrOpIdx = isUpdating ? 1 : 2; + if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); bool is64BitVector = VT.is64BitVector(); + Align = GetVLDSTAlign(Align, NumVecs, is64BitVector); unsigned OpcodeIndex; switch (VT.getSimpleVT().SimpleTy) { @@ -1120,88 +1498,97 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, ResTyElts *= 2; ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); } + std::vector<EVT> ResTys; + ResTys.push_back(ResTy); + if (isUpdating) + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::Other); SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SDValue SuperReg; - if (is64BitVector) { - unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); - if (NumVecs == 1) - return VLd; - - SuperReg = SDValue(VLd, 0); - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec, - dl, VT, SuperReg); - ReplaceUses(SDValue(N, Vec), D); - } - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); - return NULL; - } - - if (NumVecs <= 2) { - // Quad registers are directly supported for VLD1 and VLD2, - // loading pairs of D regs. - unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5); - if (NumVecs == 1) - return VLd; + SDNode *VLd; + SmallVector<SDValue, 7> Ops; - SuperReg = SDValue(VLd, 0); - Chain = SDValue(VLd, 1); + // Double registers and VLD1/VLD2 quad registers are directly supported. + if (is64BitVector || NumVecs <= 2) { + unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : + QOpcodes0[OpcodeIndex]); + Ops.push_back(MemAddr); + Ops.push_back(Align); + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + } + Ops.push_back(Pred); + Ops.push_back(Reg0); + Ops.push_back(Chain); + VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); } else { // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. EVT AddrTy = MemAddr.getValueType(); - // Load the even subregs. - unsigned Opc = QOpcodes0[OpcodeIndex]; + // Load the even subregs. This is always an updating load, so that it + // provides the address to the second load for the odd subregs. SDValue ImplDef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; - SDNode *VLdA = - CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7); + SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, + ResTy, AddrTy, MVT::Other, OpsA, 7); Chain = SDValue(VLdA, 2); // Load the odd subregs. - Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0), - Pred, Reg0, Chain }; - SDNode *VLdB = - CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7); - SuperReg = SDValue(VLdB, 0); - Chain = SDValue(VLdB, 2); - } - - // Extract out the Q registers. - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, - dl, VT, SuperReg); - ReplaceUses(SDValue(N, Vec), Q); - } - ReplaceUses(SDValue(N, NumVecs), Chain); + Ops.push_back(SDValue(VLdA, 1)); + Ops.push_back(Align); + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + assert(isa<ConstantSDNode>(Inc.getNode()) && + "only constant post-increment update allowed for VLD3/4"); + (void)Inc; + Ops.push_back(Reg0); + } + Ops.push_back(SDValue(VLdA, 0)); + Ops.push_back(Pred); + Ops.push_back(Reg0); + Ops.push_back(Chain); + VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, + Ops.data(), Ops.size()); + } + + if (NumVecs == 1) + return VLd; + + // Extract out the subregisters. + SDValue SuperReg = SDValue(VLd, 0); + assert(ARM::dsub_7 == ARM::dsub_0+7 && + ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); return NULL; } -SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, +SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) + unsigned AddrOpIdx = isUpdating ? 1 : 2; + unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) + if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); - EVT VT = N->getOperand(3).getValueType(); + EVT VT = N->getOperand(Vec0Idx).getValueType(); bool is64BitVector = VT.is64BitVector(); + Align = GetVLDSTAlign(Align, NumVecs, is64BitVector); unsigned OpcodeIndex; switch (VT.getSimpleVT().SimpleTy) { @@ -1222,119 +1609,128 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, break; } + std::vector<EVT> ResTys; + if (isUpdating) + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::Other); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 7> Ops; - Ops.push_back(MemAddr); - Ops.push_back(Align); - if (is64BitVector) { + // Double registers and VST1/VST2 quad registers are directly supported. + if (is64BitVector || NumVecs <= 2) { + SDValue SrcReg; if (NumVecs == 1) { - Ops.push_back(N->getOperand(3)); - } else { - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - + SrcReg = N->getOperand(Vec0Idx); + } else if (is64BitVector) { // Form a REG_SEQUENCE to force register allocation. + SDValue V0 = N->getOperand(Vec0Idx + 0); + SDValue V1 = N->getOperand(Vec0Idx + 1); if (NumVecs == 2) - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); else { - SDValue V2 = N->getOperand(2+3); - // If it's a vld3, form a quad D-register and leave the last part as + SDValue V2 = N->getOperand(Vec0Idx + 2); + // If it's a vst3, form a quad D-register and leave the last part as // an undef. SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + : N->getOperand(Vec0Idx + 3); + SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - Ops.push_back(RegSeq); - } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = DOpcodes[OpcodeIndex]; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); - } - - if (NumVecs <= 2) { - // Quad registers are directly supported for VST1 and VST2. - unsigned Opc = QOpcodes0[OpcodeIndex]; - if (NumVecs == 1) { - Ops.push_back(N->getOperand(3)); } else { // Form a QQ register. - SDValue Q0 = N->getOperand(3); - SDValue Q1 = N->getOperand(4); - Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0)); + SDValue Q0 = N->getOperand(Vec0Idx); + SDValue Q1 = N->getOperand(Vec0Idx + 1); + SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); + } + + unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : + QOpcodes0[OpcodeIndex]); + Ops.push_back(MemAddr); + Ops.push_back(Align); + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } + Ops.push_back(SrcReg); Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register + Ops.push_back(Reg0); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. // Form the QQQQ REG_SEQUENCE. - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - SDValue V2 = N->getOperand(2+3); + SDValue V0 = N->getOperand(Vec0Idx + 0); + SDValue V1 = N->getOperand(Vec0Idx + 1); + SDValue V2 = N->getOperand(Vec0Idx + 2); SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) - : N->getOperand(3+3); + : N->getOperand(Vec0Idx + 3); SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); - // Store the even D registers. - Ops.push_back(Reg0); // post-access address offset - Ops.push_back(RegSeq); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), 7); + // Store the even D registers. This is always an updating store, so that it + // provides the address to the second store for the odd subregs. + const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; + SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, + MemAddr.getValueType(), + MVT::Other, OpsA, 7); Chain = SDValue(VStA, 1); // Store the odd D registers. - Ops[0] = SDValue(VStA, 0); // MemAddr - Ops[6] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), 7); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; + Ops.push_back(SDValue(VStA, 0)); + Ops.push_back(Align); + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + assert(isa<ConstantSDNode>(Inc.getNode()) && + "only constant post-increment update allowed for VST3/4"); + (void)Inc; + Ops.push_back(Reg0); + } + Ops.push_back(RegSeq); + Ops.push_back(Pred); + Ops.push_back(Reg0); + Ops.push_back(Chain); + return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, + Ops.data(), Ops.size()); } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, - unsigned NumVecs, unsigned *DOpcodes, - unsigned *QOpcodes0, - unsigned *QOpcodes1) { + bool isUpdating, unsigned NumVecs, + unsigned *DOpcodes, + unsigned *QOpcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) + unsigned AddrOpIdx = isUpdating ? 1 : 2; + unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) + if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); unsigned Lane = - cast<ConstantSDNode>(N->getOperand(NumVecs+3))->getZExtValue(); - EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType(); + cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); + EVT VT = N->getOperand(Vec0Idx).getValueType(); bool is64BitVector = VT.is64BitVector(); - // Quad registers are handled by load/store of subregs. Find the subreg info. - unsigned NumElts = 0; - bool Even = false; - EVT RegVT = VT; - if (!is64BitVector) { - RegVT = GetNEONSubregVT(VT); - NumElts = RegVT.getVectorNumElements(); - Even = Lane < NumElts; - } + unsigned Alignment = 0; + if (NumVecs != 3) { + Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); + unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; + if (Alignment > NumBytes) + Alignment = NumBytes; + if (Alignment < 8 && Alignment < NumBytes) + Alignment = 0; + // Alignment must be a power of two; make sure of that. + Alignment = (Alignment & -Alignment); + if (Alignment == 1) + Alignment = 0; + } + Align = CurDAG->getTargetConstant(Alignment, MVT::i32); unsigned OpcodeIndex; switch (VT.getSimpleVT().SimpleTy) { @@ -1350,124 +1746,144 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, case MVT::v4i32: OpcodeIndex = 1; break; } + std::vector<EVT> ResTys; + if (IsLoad) { + unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; + if (!is64BitVector) + ResTyElts *= 2; + ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), + MVT::i64, ResTyElts)); + } + if (isUpdating) + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::Other); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 10> Ops; + SmallVector<SDValue, 8> Ops; Ops.push_back(MemAddr); Ops.push_back(Align); + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + } - unsigned Opc = 0; - if (is64BitVector) { - Opc = DOpcodes[OpcodeIndex]; - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = N->getOperand(2+3); - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq)); + SDValue SuperReg; + SDValue V0 = N->getOperand(Vec0Idx + 0); + SDValue V1 = N->getOperand(Vec0Idx + 1); + if (NumVecs == 2) { + if (is64BitVector) + SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else + SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); } else { - // Check if this is loading the even or odd subreg of a Q register. - if (Lane < NumElts) { - Opc = QOpcodes0[OpcodeIndex]; - } else { - Lane -= NumElts; - Opc = QOpcodes1[OpcodeIndex]; - } - - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - if (NumVecs == 2) { - RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); - } else { - SDValue V2 = N->getOperand(2+3); - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); - } - - // Extract the subregs of the input vector. - unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, - RegSeq)); + SDValue V2 = N->getOperand(Vec0Idx + 2); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(Vec0Idx + 3); + if (is64BitVector) + SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + else + SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); } + Ops.push_back(SuperReg); Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); + unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : + QOpcodes[OpcodeIndex]); + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, + Ops.data(), Ops.size()); if (!IsLoad) - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6); + return VLdLn; - std::vector<EVT> ResTys(NumVecs, RegVT); - ResTys.push_back(MVT::Other); - SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); + // Extract the subregisters. + SuperReg = SDValue(VLdLn, 0); + assert(ARM::dsub_7 == ARM::dsub_0+7 && + ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); + return NULL; +} - // Form a REG_SEQUENCE to force register allocation. - SDValue RegSeq; - if (is64BitVector) { - SDValue V0 = SDValue(VLdLn, 0); - SDValue V1 = SDValue(VLdLn, 1); - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = SDValue(VLdLn, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : SDValue(VLdLn, 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - } else { - // For 128-bit vectors, take the 64-bit results of the load and insert - // them as subregs into the result. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - if (Even) { - V[i] = SDValue(VLdLn, Vec); - V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - } else { - V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - V[i+1] = SDValue(VLdLn, Vec); - } - } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); +SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, + unsigned NumVecs, unsigned *Opcodes) { + assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); + DebugLoc dl = N->getDebugLoc(); - if (NumVecs == 2) - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); - else - RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) + return NULL; + + SDValue Chain = N->getOperand(0); + EVT VT = N->getValueType(0); + + unsigned Alignment = 0; + if (NumVecs != 3) { + Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); + unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8; + if (Alignment > NumBytes) + Alignment = NumBytes; + if (Alignment < 8 && Alignment < NumBytes) + Alignment = 0; + // Alignment must be a power of two; make sure of that. + Alignment = (Alignment & -Alignment); + if (Alignment == 1) + Alignment = 0; + } + Align = CurDAG->getTargetConstant(Alignment, MVT::i32); + + unsigned OpcodeIndex; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld-dup type"); + case MVT::v8i8: OpcodeIndex = 0; break; + case MVT::v4i16: OpcodeIndex = 1; break; + case MVT::v2f32: + case MVT::v2i32: OpcodeIndex = 2; break; + } + + SDValue Pred = getAL(CurDAG); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue SuperReg; + unsigned Opc = Opcodes[OpcodeIndex]; + SmallVector<SDValue, 6> Ops; + Ops.push_back(MemAddr); + Ops.push_back(Align); + if (isUpdating) { + SDValue Inc = N->getOperand(2); + Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } + Ops.push_back(Pred); + Ops.push_back(Reg0); + Ops.push_back(Chain); + unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; + std::vector<EVT> ResTys; + ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts)); + if (isUpdating) + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::Other); + SDNode *VLdDup = + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + SuperReg = SDValue(VLdDup, 0); + + // Extract the subregisters. assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + unsigned SubIdx = ARM::dsub_0; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); return NULL; } @@ -1486,7 +1902,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); else { SDValue V2 = N->getOperand(FirstTblReg + 2); - // If it's a vtbl3, form a quad D-register and leave the last part as + // If it's a vtbl3, form a quad D-register and leave the last part as // an undef. SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) @@ -1494,17 +1910,10 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - // Now extract the D registers back out. SmallVector<SDValue, 6> Ops; if (IsExt) Ops.push_back(N->getOperand(1)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq)); - + Ops.push_back(RegSeq); Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); Ops.push_back(getAL(CurDAG)); // predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register @@ -1574,7 +1983,7 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { SDValue CPTmp0; SDValue CPTmp1; - if (SelectT2ShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1)) { + if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue(); unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); unsigned Opc = 0; @@ -1602,7 +2011,7 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, SDValue CPTmp0; SDValue CPTmp1; SDValue CPTmp2; - if (SelectShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7); @@ -1611,36 +2020,66 @@ SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, } SDNode *ARMDAGToDAGISel:: -SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { +SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); if (!T) return 0; - if (Pred_t2_so_imm(TrueVal.getNode())) { - SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); + unsigned Opc = 0; + unsigned TrueImm = T->getZExtValue(); + if (is_t2_so_imm(TrueImm)) { + Opc = ARM::t2MOVCCi; + } else if (TrueImm <= 0xffff) { + Opc = ARM::t2MOVCCi16; + } else if (is_t2_so_imm_not(TrueImm)) { + TrueImm = ~TrueImm; + Opc = ARM::t2MVNCCi; + } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) { + // Large immediate. + Opc = ARM::t2MOVCCi32imm; + } + + if (Opc) { + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, - ARM::t2MOVCCi, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } + return 0; } SDNode *ARMDAGToDAGISel:: -SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { +SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); if (!T) return 0; - if (Pred_so_imm(TrueVal.getNode())) { - SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); + unsigned Opc = 0; + unsigned TrueImm = T->getZExtValue(); + bool isSoImm = is_so_imm(TrueImm); + if (isSoImm) { + Opc = ARM::MOVCCi; + } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) { + Opc = ARM::MOVCCi16; + } else if (is_so_imm_not(TrueImm)) { + TrueImm = ~TrueImm; + Opc = ARM::MVNCCi; + } else if (TrueVal.getNode()->hasOneUse() && + (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) { + // Large immediate. + Opc = ARM::MOVCCi32imm; + } + + if (Opc) { + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, - ARM::MOVCCi, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } + return 0; } @@ -1688,18 +2127,18 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) // Pattern complexity = 10 cost = 1 size = 0 if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVSoImmOp(N, FalseVal, TrueVal, + SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectT2CMOVSoImmOp(N, TrueVal, FalseVal, + Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; } else { - SDNode *Res = SelectARMCMOVSoImmOp(N, FalseVal, TrueVal, + SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); if (!Res) - Res = SelectARMCMOVSoImmOp(N, TrueVal, FalseVal, + Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal, ARMCC::getOppositeCondition(CCVal), CCR, InFlag); if (Res) return Res; @@ -1742,13 +2181,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { EVT VT = N->getValueType(0); if (!VT.is128BitVector() || N->getNumOperands() != 2) llvm_unreachable("unexpected CONCAT_VECTORS"); - DebugLoc dl = N->getDebugLoc(); - SDValue V0 = N->getOperand(0); - SDValue V1 = N->getOperand(1); - SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); - SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); + return PairDRegs(VT, N->getOperand(0), N->getOperand(1)); } SDNode *ARMDAGToDAGISel::Select(SDNode *N) { @@ -1788,19 +2221,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; - ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, + ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, Ops, 4); } else { SDValue Ops[] = { CPIdx, - CurDAG->getRegister(0, MVT::i32), CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getEntryNode() }; ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, - Ops, 6); + Ops, 5); } ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); return NULL; @@ -1930,7 +2362,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::UMULL : ARM::UMULLv5, + dl, MVT::i32, MVT::i32, Ops, 5); } } case ISD::SMUL_LOHI: { @@ -1944,7 +2378,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); + return CurDAG->getMachineNode(Subtarget->hasV6Ops() ? + ARM::SMULL : ARM::SMULLv5, + dl, MVT::i32, MVT::i32, Ops, 5); } } case ISD::LOAD: { @@ -1987,7 +2423,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MVT::i32); SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, - MVT::Flag, Ops, 5); + MVT::Glue, Ops, 5); Chain = SDValue(ResNode, 0); if (N->getNumValues() == 2) { InFlag = SDValue(ResNode, 1); @@ -2088,12 +2524,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { EVT VecVT = N->getValueType(0); EVT EltVT = VecVT.getVectorElementType(); unsigned NumElts = VecVT.getVectorNumElements(); - if (EltVT.getSimpleVT() == MVT::f64) { + if (EltVT == MVT::f64) { assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1)); } - assert(EltVT.getSimpleVT() == MVT::f32 && - "unexpected type for BUILD_VECTOR"); + assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); if (NumElts == 2) return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1)); assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); @@ -2101,6 +2536,170 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { N->getOperand(2), N->getOperand(3)); } + case ARMISD::VLD2DUP: { + unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo, + ARM::VLD2DUPd32Pseudo }; + return SelectVLDDup(N, false, 2, Opcodes); + } + + case ARMISD::VLD3DUP: { + unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo, + ARM::VLD3DUPd32Pseudo }; + return SelectVLDDup(N, false, 3, Opcodes); + } + + case ARMISD::VLD4DUP: { + unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo, + ARM::VLD4DUPd32Pseudo }; + return SelectVLDDup(N, false, 4, Opcodes); + } + + case ARMISD::VLD2DUP_UPD: { + unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD, + ARM::VLD2DUPd32Pseudo_UPD }; + return SelectVLDDup(N, true, 2, Opcodes); + } + + case ARMISD::VLD3DUP_UPD: { + unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD, + ARM::VLD3DUPd32Pseudo_UPD }; + return SelectVLDDup(N, true, 3, Opcodes); + } + + case ARMISD::VLD4DUP_UPD: { + unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD, + ARM::VLD4DUPd32Pseudo_UPD }; + return SelectVLDDup(N, true, 4, Opcodes); + } + + case ARMISD::VLD1_UPD: { + unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD, + ARM::VLD1d32_UPD, ARM::VLD1d64_UPD }; + unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD, + ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD }; + return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); + } + + case ARMISD::VLD2_UPD: { + unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD, + ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD }; + unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD, + ARM::VLD2q32Pseudo_UPD }; + return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); + } + + case ARMISD::VLD3_UPD: { + unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, + ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD }; + unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, + ARM::VLD3q16Pseudo_UPD, + ARM::VLD3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, + ARM::VLD3q16oddPseudo_UPD, + ARM::VLD3q32oddPseudo_UPD }; + return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + } + + case ARMISD::VLD4_UPD: { + unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, + ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD }; + unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, + ARM::VLD4q16Pseudo_UPD, + ARM::VLD4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, + ARM::VLD4q16oddPseudo_UPD, + ARM::VLD4q32oddPseudo_UPD }; + return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + } + + case ARMISD::VLD2LN_UPD: { + unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD, + ARM::VLD2LNd32Pseudo_UPD }; + unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, + ARM::VLD2LNq32Pseudo_UPD }; + return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); + } + + case ARMISD::VLD3LN_UPD: { + unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD, + ARM::VLD3LNd32Pseudo_UPD }; + unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, + ARM::VLD3LNq32Pseudo_UPD }; + return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); + } + + case ARMISD::VLD4LN_UPD: { + unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD, + ARM::VLD4LNd32Pseudo_UPD }; + unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, + ARM::VLD4LNq32Pseudo_UPD }; + return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); + } + + case ARMISD::VST1_UPD: { + unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD, + ARM::VST1d32_UPD, ARM::VST1d64_UPD }; + unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD, + ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD }; + return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); + } + + case ARMISD::VST2_UPD: { + unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD, + ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD }; + unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD, + ARM::VST2q32Pseudo_UPD }; + return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); + } + + case ARMISD::VST3_UPD: { + unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD, + ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD }; + unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, + ARM::VST3q16Pseudo_UPD, + ARM::VST3q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, + ARM::VST3q16oddPseudo_UPD, + ARM::VST3q32oddPseudo_UPD }; + return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + } + + case ARMISD::VST4_UPD: { + unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, + ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD }; + unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, + ARM::VST4q16Pseudo_UPD, + ARM::VST4q32Pseudo_UPD }; + unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, + ARM::VST4q16oddPseudo_UPD, + ARM::VST4q32oddPseudo_UPD }; + return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + } + + case ARMISD::VST2LN_UPD: { + unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD, + ARM::VST2LNd32Pseudo_UPD }; + unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, + ARM::VST2LNq32Pseudo_UPD }; + return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); + } + + case ARMISD::VST3LN_UPD: { + unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD, + ARM::VST3LNd32Pseudo_UPD }; + unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, + ARM::VST3LNq32Pseudo_UPD }; + return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); + } + + case ARMISD::VST4LN_UPD: { + unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD, + ARM::VST4LNd32Pseudo_UPD }; + unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, + ARM::VST4LNq32Pseudo_UPD }; + return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); + } + case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -2113,7 +2712,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1d32, ARM::VLD1d64 }; unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo, ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo }; - return SelectVLD(N, 1, DOpcodes, QOpcodes, 0); + return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { @@ -2121,7 +2720,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo }; unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; - return SelectVLD(N, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld3: { @@ -2130,10 +2729,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, - ARM::VLD3q16oddPseudo_UPD, - ARM::VLD3q32oddPseudo_UPD }; - return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); + unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo, + ARM::VLD3q16oddPseudo, + ARM::VLD3q32oddPseudo }; + return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { @@ -2142,31 +2741,31 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, - ARM::VLD4q16oddPseudo_UPD, - ARM::VLD4q32oddPseudo_UPD }; - return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); + unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo, + ARM::VLD4q16oddPseudo, + ARM::VLD4q32oddPseudo }; + return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld2lane: { - unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 }; - unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd }; - return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1); + unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo, + ARM::VLD2LNd32Pseudo }; + unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo }; + return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vld3lane: { - unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 }; - unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd }; - return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo, + ARM::VLD3LNd32Pseudo }; + unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo }; + return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vld4lane: { - unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 }; - unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd }; - return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo, + ARM::VLD4LNd32Pseudo }; + unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo }; + return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vst1: { @@ -2174,7 +2773,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1d32, ARM::VST1d64 }; unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; - return SelectVST(N, 1, DOpcodes, QOpcodes, 0); + return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { @@ -2182,7 +2781,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, ARM::VST2q32Pseudo }; - return SelectVST(N, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { @@ -2191,10 +2790,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, ARM::VST3q16Pseudo_UPD, ARM::VST3q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, - ARM::VST3q16oddPseudo_UPD, - ARM::VST3q32oddPseudo_UPD }; - return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); + unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo, + ARM::VST3q16oddPseudo, + ARM::VST3q32oddPseudo }; + return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { @@ -2203,31 +2802,31 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, ARM::VST4q16Pseudo_UPD, ARM::VST4q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, - ARM::VST4q16oddPseudo_UPD, - ARM::VST4q32oddPseudo_UPD }; - return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); + unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo, + ARM::VST4q16oddPseudo, + ARM::VST4q32oddPseudo }; + return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2lane: { - unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 }; - unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 }; - unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd }; - return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1); + unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo, + ARM::VST2LNd32Pseudo }; + unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo }; + return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vst3lane: { - unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 }; - unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 }; - unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd }; - return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo, + ARM::VST3LNd32Pseudo }; + unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo }; + return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vst4lane: { - unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 }; - unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 }; - unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd }; - return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo, + ARM::VST4LNd32Pseudo }; + unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo }; + return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); } } break; @@ -2240,18 +2839,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; case Intrinsic::arm_neon_vtbl2: - return SelectVTBL(N, false, 2, ARM::VTBL2); + return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo); case Intrinsic::arm_neon_vtbl3: - return SelectVTBL(N, false, 3, ARM::VTBL3); + return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); case Intrinsic::arm_neon_vtbl4: - return SelectVTBL(N, false, 4, ARM::VTBL4); + return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); case Intrinsic::arm_neon_vtbx2: - return SelectVTBL(N, true, 2, ARM::VTBX2); + return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo); case Intrinsic::arm_neon_vtbx3: - return SelectVTBL(N, true, 3, ARM::VTBX3); + return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); case Intrinsic::arm_neon_vtbx4: - return SelectVTBL(N, true, 4, ARM::VTBX4); + return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); } break; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ce4a2c9..1835ec0 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMAddressingModes.h" +#include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" @@ -28,9 +29,11 @@ #include "llvm/Function.h" #include "llvm/GlobalValue.h" #include "llvm/Instruction.h" +#include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/Type.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -41,6 +44,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -50,6 +54,7 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); +STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); // This option should go away when tail calls fully work. static cl::opt<bool> @@ -57,14 +62,7 @@ EnableARMTailCalls("arm-tail-calls", cl::Hidden, cl::desc("Generate tail calls (TEMPORARY OPTION)."), cl::init(false)); -// This option should go away when Machine LICM is smart enough to hoist a -// reg-to-reg VDUP. -static cl::opt<bool> -EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden, - cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."), - cl::init(false)); - -static cl::opt<bool> +cl::opt<bool> EnableARMLongCalls("arm-long-calls", cl::Hidden, cl::desc("Generate calls via indirect call instructions"), cl::init(false)); @@ -74,28 +72,6 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); -static cl::opt<bool> -EnableARMCodePlacement("arm-code-placement", cl::Hidden, - cl::desc("Enable code placement pass for ARM"), - cl::init(false)); - -static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); - void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { @@ -111,8 +87,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); - if (ElemTy == MVT::i8 || ElemTy == MVT::i16) - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); if (ElemTy != MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); @@ -122,7 +97,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); if (VT.isInteger()) { @@ -131,6 +106,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction(VT.getSimpleVT(), + (MVT::SimpleValueType)InnerVT, Expand); } setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); @@ -177,6 +156,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); RegInfo = TM.getRegisterInfo(); + Itins = TM.getInstrItineraryData(); if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. @@ -260,13 +240,157 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); - // Libcalls should use the AAPCS base standard ABI, even if hard float - // is in effect, as per the ARM RTABI specification, section 4.1.2. if (Subtarget->isAAPCS_ABI()) { - for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { - setLibcallCallingConv(static_cast<RTLIB::Libcall>(i), - CallingConv::ARM_AAPCS); - } + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); + setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); + setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); + setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); + setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); + setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); + setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); + setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); + setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); + setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); + setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); + setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); + setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); + setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); + setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); + setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); + setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); + setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); + setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); + setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); + setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); + setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); + setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); + setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); + setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); + setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); + setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); + setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); + setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); + setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); + setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); + setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); + setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); + setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); + setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); + setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); + setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); + setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); + setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); + setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); + setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); + setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); + setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); + setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); + setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); + setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); + setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); + setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); + setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); + setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); + setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); + + // Integer division functions + // RTABI chapter 4.3.1 + setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); + setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); + setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); + setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); + setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); + setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); + setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); } if (Subtarget->isThumb1Only()) @@ -330,9 +454,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); + // Custom handling for some vector types to avoid expensive expansions + setOperationAction(ISD::SDIV, MVT::v4i16, Custom); + setOperationAction(ISD::SDIV, MVT::v8i8, Custom); + setOperationAction(ISD::UDIV, MVT::v4i16, Custom); + setOperationAction(ISD::UDIV, MVT::v8i8, Custom); setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + setTargetDAGCombine(ISD::INTRINSIC_VOID); + setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); @@ -341,6 +472,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SELECT_CC); + setTargetDAGCombine(ISD::BUILD_VECTOR); + setTargetDAGCombine(ISD::VECTOR_SHUFFLE); + setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + setTargetDAGCombine(ISD::STORE); } computeRegisterProperties(); @@ -397,7 +532,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i32, Expand); // These are expanded into libcalls. - if (!Subtarget->hasDivide()) { + if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { // v7M has a hardware divider setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); @@ -423,14 +558,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - // FIXME: Shouldn't need this, since no register is used, but the legalizer - // doesn't yet know how to not do that for SjLj. - setExceptionSelectorRegister(ARM::R0); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setExceptionPointerRegister(ARM::R0); + setExceptionSelectorRegister(ARM::R1); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasDataBarrier() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) { + (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { // membarrier needs custom lowering; the rest are legal and handled // normally. setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); @@ -474,6 +610,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -484,7 +622,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. - setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); + setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); } @@ -493,6 +631,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetDarwin()) { setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); } setOperationAction(ISD::SETCC, MVT::i32, Expand); @@ -547,8 +686,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasV6T2Ops()) + if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) setTargetDAGCombine(ISD::OR); + if (Subtarget->hasNEON()) + setTargetDAGCombine(ISD::AND); setStackPointerRegisterToSaveRestore(ARM::SP); @@ -557,16 +698,26 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) else setSchedulingPreference(Sched::Hybrid); - maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type + //// temporary - rewrite interface to use type + maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); - if (EnableARMCodePlacement) - benefitFromCodePlacementOpt = true; + benefitFromCodePlacementOpt = true; } +// FIXME: It might make sense to define the representative register class as the +// nearest super-register that has a non-null superset. For example, DPR_VFP2 is +// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, +// SPR's representative would be DPR_VFP2. This should work well if register +// pressure tracking were modified such that a register use would increment the +// pressure of the register class's representative and all of it's super +// classes' representatives transitively. We have not implemented this because +// of the difficulty prior to coalescing of modeling operand register classes +// due to the common occurence of cross class copies and subregister insertions +// and extractions. std::pair<const TargetRegisterClass*, uint8_t> ARMTargetLowering::findRepresentativeClass(EVT VT) const{ const TargetRegisterClass *RRC = 0; @@ -580,6 +731,12 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: RRC = ARM::DPRRegisterClass; + // When NEON is used for SP, only half of the register file is available + // because operations that define both SP and DP results will be constrained + // to the VFP2 class (D0-D15). We currently model this constraint prior to + // coalescing by double-counting the SP regs. See the FIXME above. + if (Subtarget->useNEONForSinglePrecisionFP()) + Cost = 2; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: @@ -602,6 +759,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; case ARMISD::Wrapper: return "ARMISD::Wrapper"; + case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; + case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; @@ -612,7 +771,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; - case ARMISD::AND: return "ARMISD::AND"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; @@ -633,25 +791,33 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; - case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; - case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; + case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; + case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; + case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; - + case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; - case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; + case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; + + case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; case ARMISD::VCEQ: return "ARMISD::VCEQ"; + case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; + case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; + case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; case ARMISD::VCGT: return "ARMISD::VCGT"; + case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; + case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; case ARMISD::VCGTU: return "ARMISD::VCGTU"; case ARMISD::VTST: return "ARMISD::VTST"; @@ -693,6 +859,28 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; case ARMISD::BFI: return "ARMISD::BFI"; + case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; + case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; + case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; + case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; + case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; + case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; + case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; + case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; + case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; + case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; + case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; + case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; + case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; + case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; + case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; + case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; + case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; + case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; + case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; + case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; + case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; + case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; } } @@ -735,6 +923,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { for (unsigned i = 0; i != NumVals; ++i) { EVT VT = N->getValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; if (VT.isFloatingPoint() || VT.isVector()) return Sched::Latency; } @@ -746,25 +936,29 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // is not available. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); - if (TID.mayLoad()) - return Sched::Latency; - const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); - if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) + if (TID.getNumDefs() == 0) + return Sched::RegPressure; + if (!Itins->isEmpty() && + Itins->getOperandCycle(TID.getSchedClass(), 0) > 2) return Sched::Latency; + return Sched::RegPressure; } +// FIXME: Move to RegInfo unsigned ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + switch (RC->getID()) { default: return 0; case ARM::tGPRRegClassID: - return RegInfo->hasFP(MF) ? 4 : 5; + return TFI->hasFP(MF) ? 4 : 5; case ARM::GPRRegClassID: { - unsigned FP = RegInfo->hasFP(MF) ? 1 : 0; + unsigned FP = TFI->hasFP(MF) ? 1 : 0; return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. @@ -829,136 +1023,6 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" -// APCS f64 is in register pairs, possibly split to stack -static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - CCState &State, bool CanFail) { - static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - - // Try to get the first register. - if (unsigned Reg = State.AllocateReg(RegList, 4)) - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - else { - // For the 2nd half of a v2f64, do not fail. - if (CanFail) - return false; - - // Put the whole thing on the stack. - State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, - State.AllocateStack(8, 4), - LocVT, LocInfo)); - return true; - } - - // Try to get the second register. - if (unsigned Reg = State.AllocateReg(RegList, 4)) - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - else - State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, - State.AllocateStack(4, 4), - LocVT, LocInfo)); - return true; -} - -static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) - return false; - if (LocVT == MVT::v2f64 && - !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) - return false; - return true; // we handled it -} - -// AAPCS f64 is in aligned register pairs -static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - CCState &State, bool CanFail) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; - static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; - static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; - - unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); - if (Reg == 0) { - // For the 2nd half of a v2f64, do not just fail. - if (CanFail) - return false; - - // Put the whole thing on the stack. - State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, - State.AllocateStack(8, 8), - LocVT, LocInfo)); - return true; - } - - unsigned i; - for (i = 0; i < 2; ++i) - if (HiRegList[i] == Reg) - break; - - unsigned T = State.AllocateReg(LoRegList[i]); - (void)T; - assert(T == LoRegList[i] && "Could not allocate register"); - - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], - LocVT, LocInfo)); - return true; -} - -static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) - return false; - if (LocVT == MVT::v2f64 && - !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) - return false; - return true; // we handled it -} - -static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, CCState &State) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; - static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; - - unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); - if (Reg == 0) - return false; // we didn't handle it - - unsigned i; - for (i = 0; i < 2; ++i) - if (HiRegList[i] == Reg) - break; - - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], - LocVT, LocInfo)); - return true; -} - -static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) - return false; - if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) - return false; - return true; // we handled it -} - -static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { - return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, - State); -} - /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, @@ -967,23 +1031,29 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::C: case CallingConv::Fast: + if (Subtarget->hasVFP2() && !isVarArg) { + if (!Subtarget->isAAPCS_ABI()) + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); + // For AAPCS ABI targets, just use VFP variant of the calling convention. + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + } + // Fallthrough + case CallingConv::C: { // Use target triple & subtarget features to do actual dispatch. - if (Subtarget->isAAPCS_ABI()) { - if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); - else - return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); - } else - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + if (!Subtarget->isAAPCS_ABI()) + return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + else if (Subtarget->hasVFP2() && + FloatABIType == FloatABI::Hard && !isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + } case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); } } @@ -1050,7 +1120,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: - Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); + Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); break; } @@ -1073,7 +1143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, /*AlwaysInline=*/false, - NULL, 0, NULL, 0); + MachinePointerInfo(0), MachinePointerInfo(0)); } /// LowerMemOpCallTo - Store the argument to the stack. @@ -1086,11 +1156,11 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - if (Flags.isByVal()) { + if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); - } + return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset, + MachinePointerInfo::getStack(LocMemOffset), false, false, 0); } @@ -1198,7 +1268,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } @@ -1289,7 +1359,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); // Create a constant pool entry for the callee address - unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); @@ -1298,13 +1368,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); // Create a constant pool entry for the callee address - unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), Sym, ARMPCLabelIndex, 0); // Get the address of the callee into a register @@ -1312,7 +1382,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { @@ -1326,7 +1396,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); @@ -1334,13 +1404,19 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); - } else - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + } else { + // On ELF targets for PIC code, direct calls should go through the PLT + unsigned OpFlags = 0; + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) + OpFlags = ARMII::MO_PLT; + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); + } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; bool isStub = Subtarget->isTargetDarwin() && @@ -1349,20 +1425,26 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); - } else - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); + } else { + unsigned OpFlags = 0; + // On ELF targets for PIC code, direct calls should go through the PLT + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) + OpFlags = ARMII::MO_PLT; + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); + } } // FIXME: handle tail calls differently. @@ -1391,7 +1473,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (InFlag.getNode()) Ops.push_back(InFlag); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); @@ -1421,7 +1503,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); - if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) + if (!TargetRegisterInfo::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) @@ -1490,32 +1572,15 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // LR. This means if we need to reload LR, it takes an extra instructions, // which outweighs the value of the tail call; but here we don't know yet // whether LR is going to be used. Probably the right approach is to - // generate the tail call here and turn it back into CALL/RET in + // generate the tail call here and turn it back into CALL/RET in // emitEpilogue if LR is used. - if (Subtarget->isThumb1Only()) - return false; - - // For the moment, we can only do this to functions defined in this - // compilation, or to indirect calls. A Thumb B to an ARM function, - // or vice versa, is not easily fixed up in the linker unlike BL. - // (We could do this by loading the address of the callee into a register; - // that is an extra instruction over the direct call and burns a register - // as well, so is not likely to be a win.) - - // It might be safe to remove this restriction on non-Darwin. // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, // but we need to make sure there are enough registers; the only valid // registers are the 4 used for parameters. We don't currently do this // case. - if (isa<ExternalSymbolSDNode>(Callee)) - return false; - - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); - if (GV->isDeclaration() || GV->isWeakForLinker()) - return false; - } + if (Subtarget->isThumb1Only()) + return false; // If the calling conventions do not match, then we'd better make sure the // results are returned in the same way as what the caller expects. @@ -1583,7 +1648,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (!VA.isRegLoc()) return false; if (!ArgLocs[++i].isRegLoc()) - return false; + return false; if (RegVT == MVT::v2f64) { if (!ArgLocs[++i].isRegLoc()) return false; @@ -1643,7 +1708,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } @@ -1693,6 +1758,61 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return result; } +bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { + if (N->getNumValues() != 1) + return false; + if (!N->hasNUsesOfValue(1, 0)) + return false; + + unsigned NumCopies = 0; + SDNode* Copies[2]; + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::CopyToReg) { + Copies[NumCopies++] = Use; + } else if (Use->getOpcode() == ARMISD::VMOVRRD) { + // f64 returned in a pair of GPRs. + for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != ISD::CopyToReg) + return false; + Copies[UI.getUse().getResNo()] = *UI; + ++NumCopies; + } + } else if (Use->getOpcode() == ISD::BITCAST) { + // f32 returned in a single GPR. + if (!Use->hasNUsesOfValue(1, 0)) + return false; + Use = *Use->use_begin(); + if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) + return false; + Copies[NumCopies++] = Use; + } else { + return false; + } + + if (NumCopies != 1 && NumCopies != 2) + return false; + + bool HasRet = false; + for (unsigned i = 0; i < NumCopies; ++i) { + SDNode *Copy = Copies[i]; + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() == ISD::CopyToReg) { + SDNode *Use = *UI; + if (Use == Copies[0] || Use == Copies[1]) + continue; + return false; + } + if (UI->getOpcode() != ARMISD::RET_FLAG) + return false; + HasRet = true; + } + } + + return HasRet; +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise @@ -1732,7 +1852,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, ARMCP::CPBlockAddress, PCAdj); @@ -1740,7 +1860,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, } CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); if (RelocM == Reloc::Static) return Result; @@ -1757,14 +1877,14 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, "tlsgd", true); + ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue Chain = Argument.getValue(1); @@ -1802,16 +1922,16 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, if (GV->isDeclaration()) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, "gottpoff", true); + ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); Chain = Offset.getValue(1); @@ -1819,15 +1939,15 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } else { // local exec model - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); } @@ -1859,51 +1979,72 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); + new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0, - false, false, 0); + MachinePointerInfo::getGOT(), false, false, 0); return Result; + } + + // If we have T2 ops, we can materialize the address directly via movt/movw + // pair. This is always cheaper. + if (Subtarget->useMovt()) { + ++NumMovwMovt; + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT)); } else { - // If we have T2 ops, we can materialize the address directly via movt/movw - // pair. This is always cheaper. - if (Subtarget->useMovt()) { - return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - } else { - SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, - false, false, 0); - } + SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), + false, false, 0); } } SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned ARMPCLabelIndex = 0; EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + if (Subtarget->useMovt()) { + ++NumMovwMovt; + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + if (RelocM == Reloc::Static) + return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT)); + + unsigned Wrapper = (RelocM == Reloc::PIC_) + ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; + SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, dl, PtrVT)); + if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) + Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, 0); + return Result; + } + + unsigned ARMPCLabelIndex = 0; SDValue CPAddr; - if (RelocM == Reloc::Static) + if (RelocM == Reloc::Static) { CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); - else { - ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + } else { + ARMPCLabelIndex = AFI->createPICLabelUId(); unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); @@ -1912,7 +2053,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue Chain = Result.getValue(1); @@ -1922,8 +2063,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, } if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, - PseudoSourceValue::getGOT(), 0, + Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), false, false, 0); return Result; @@ -1935,7 +2075,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; @@ -1945,13 +2085,21 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } SDValue +ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) + const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Val = DAG.getConstant(0, MVT::i32); @@ -1980,7 +2128,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -1994,7 +2142,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 0); if (RelocM == Reloc::PIC_) { @@ -2009,21 +2157,55 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { DebugLoc dl = Op.getDebugLoc(); - SDValue Op5 = Op.getOperand(5); - unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); - // Some subtargets which have dmb and dsb instructions can handle barriers - // directly. Some ARMv6 cpus can support them with the help of mcr - // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should - // never get here. - unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; - if (Subtarget->hasDataBarrier()) - return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); - else { - assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() && + if (!Subtarget->hasDataBarrier()) { + // Some ARMv6 cpus can support data barriers with an mcr instruction. + // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get + // here. + assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); - return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), + return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); } + + SDValue Op5 = Op.getOperand(5); + bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; + unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); + + ARM_MB::MemBOpt DMBOpt; + if (isDeviceBarrier) + DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; + else + DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(DMBOpt, MVT::i32)); +} + +static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + // ARM pre v5TE and Thumb1 does not have preload instructions. + if (!(Subtarget->isThumb2() || + (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) + // Just preserve the chain. + return Op.getOperand(0); + + DebugLoc dl = Op.getDebugLoc(); + unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; + if (!isRead && + (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) + // ARMv7 with MP extension has PLDW. + return Op.getOperand(0); + + if (Subtarget->isThumb()) + // Invert the bits. + isRead = ~isRead & 1; + unsigned isData = Subtarget->isThumb() ? 0 : 1; + + // Currently there is no intrinsic that matches pli. + return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), + DAG.getConstant(isData, MVT::i32)); } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { @@ -2036,8 +2218,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, - false, false, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); } SDValue @@ -2054,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2065,10 +2247,10 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { - Reg = MF.addLiveIn(NextVA.getLocReg(), RC); + Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } @@ -2119,7 +2301,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], @@ -2149,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } @@ -2160,7 +2342,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); break; case CCValAssign::SExt: ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, @@ -2188,7 +2370,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } @@ -2202,7 +2384,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned NumGPRs = CCInfo.getFirstUnallocated (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned VARegSize = (4 - NumGPRs) * 4; unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); unsigned ArgOffset = CCInfo.getNextStackOffset(); @@ -2214,7 +2396,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, AFI->setVarArgsFrameIndex( MFI->CreateFixedObject(VARegSaveSize, ArgOffset + VARegSaveSize - VARegSize, - true)); + false)); SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy()); @@ -2226,12 +2408,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, else RC = ARM::GPRRegisterClass; - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), - 0, false, false, 0); + MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -2320,7 +2502,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; } ARMcc = DAG.getConstant(CondCode, MVT::i32); - return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); + return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. @@ -2329,10 +2511,10 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, DebugLoc dl) const { SDValue Cmp; if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); else - Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); - return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); + return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { @@ -2444,8 +2626,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) return DAG.getLoad(MVT::i32, Op.getDebugLoc(), - Ld->getChain(), Ld->getBasePtr(), - Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->getAlignment()); @@ -2464,7 +2645,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue Ptr = Ld->getBasePtr(); RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), Ld->getChain(), Ptr, - Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->getAlignment()); @@ -2474,7 +2655,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, PtrType, Ptr, DAG.getConstant(4, PtrType)); RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), Ld->getChain(), NewPtr, - Ld->getSrcValue(), Ld->getSrcValueOffset() + 4, + Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), NewAlign); return; @@ -2524,7 +2705,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { expandf64Toi32(RHS, DAG, RHS1, RHS2); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, MVT::i32); - SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); } @@ -2564,7 +2745,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); if (CondCode2 != ARMCC::AL) { @@ -2599,14 +2780,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0, + MachinePointerInfo::getJumpTable(), false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0, false, false, 0); + MachinePointerInfo::getJumpTable(), false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } @@ -2627,7 +2808,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { break; } Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); } static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { @@ -2646,7 +2827,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { break; } - Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); return DAG.getNode(Opc, dl, VT, Op); } @@ -2657,12 +2838,46 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); - SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32); - SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); - SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl); + bool F2IisFast = Subtarget->isCortexA9() || + Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; + + // Bitcast operand 1 to i32. + if (SrcVT == MVT::f64) + Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp1, 1).getValue(1); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); + + // If float to int conversion isn't going to be super expensive, then simply + // or in the signbit. + if (F2IisFast) { + SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); + SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); + Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); + if (VT == MVT::f32) { + Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); + } + + // f64: Or the high part with signbit and then combine two parts. + Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp0, 1); + SDValue Lo = Tmp0.getValue(0); + SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); + Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + } + + // Remove the signbit of operand 0. + Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0); + + // If operand 1 signbit is one, then negate operand 0. + SDValue ARMcc; + SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32), + ISD::SETLT, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ @@ -2678,11 +2893,11 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ SDValue Offset = DAG.getConstant(4, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - NULL, 0, false, false, 0); + MachinePointerInfo(), false, false, 0); } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } @@ -2697,17 +2912,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { ? ARM::R7 : ARM::R11; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), false, false, 0); return FrameAddr; } -/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to +/// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 /// operand type is illegal (e.g., v2f32 for a target that doesn't support /// vectors), since the legalizer won't know what to do with that. -static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { +static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); DebugLoc dl = N->getDebugLoc(); SDValue Op = N->getOperand(0); @@ -2717,7 +2933,7 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && - "ExpandBIT_CONVERT called for non-i64 type"); + "ExpandBITCAST called for non-i64 type"); // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { @@ -2725,7 +2941,7 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, + return DAG.getNode(ISD::BITCAST, dl, DstVT, DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); } @@ -2752,7 +2968,7 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } /// LowerShiftRightParts - Lower SRA_PARTS, which returns two @@ -2825,7 +3041,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } -SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, +SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { // The rounding mode is in bits 23:22 of the FPSCR. // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 @@ -2835,11 +3051,11 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, DAG.getConstant(Intrinsic::arm_get_fpscr, MVT::i32)); - SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, + SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, DAG.getConstant(1U << 22, MVT::i32)); SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, DAG.getConstant(22, MVT::i32)); - return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, + return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, DAG.getConstant(3, MVT::i32)); } @@ -2860,33 +3076,40 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); + if (!VT.isVector()) + return SDValue(); + // Lower vector shifts on NEON to use VSHL. - if (VT.isVector()) { - assert(ST->hasNEON() && "unexpected vector shift"); - - // Left shifts translate directly to the vshiftu intrinsic. - if (N->getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), - N->getOperand(0), N->getOperand(1)); - - assert((N->getOpcode() == ISD::SRA || - N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); - - // NEON uses the same intrinsics for both left and right shifts. For - // right shifts, the shift amounts are negative, so negate the vector of - // shift amounts. - EVT ShiftVT = N->getOperand(1).getValueType(); - SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, - getZeroVector(ShiftVT, DAG, dl), - N->getOperand(1)); - Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? - Intrinsic::arm_neon_vshifts : - Intrinsic::arm_neon_vshiftu); + assert(ST->hasNEON() && "unexpected vector shift"); + + // Left shifts translate directly to the vshiftu intrinsic. + if (N->getOpcode() == ISD::SHL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(vshiftInt, MVT::i32), - N->getOperand(0), NegatedCount); - } + DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), + N->getOperand(0), N->getOperand(1)); + + assert((N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); + + // NEON uses the same intrinsics for both left and right shifts. For + // right shifts, the shift amounts are negative, so negate the vector of + // shift amounts. + EVT ShiftVT = N->getOperand(1).getValueType(); + SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, + getZeroVector(ShiftVT, DAG, dl), + N->getOperand(1)); + Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? + Intrinsic::arm_neon_vshifts : + Intrinsic::arm_neon_vshiftu); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(vshiftInt, MVT::i32), + N->getOperand(0), NegatedCount); +} + +static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); // We can get here for a node like i32 = ISD::SHL i32, i64 if (VT != MVT::i64) @@ -2912,7 +3135,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -2998,13 +3221,13 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { AndOp = Op1; // Ignore bitconvert. - if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) + if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) AndOp = AndOp.getOperand(0); if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { Opc = ARMISD::VTST; - Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); - Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); + Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); + Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); Invert = !Invert; } } @@ -3013,7 +3236,38 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (Swap) std::swap(Op0, Op1); - SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + // If one of the operands is a constant vector zero, attempt to fold the + // comparison to a specialized compare-against-zero form. + SDValue SingleOp; + if (ISD::isBuildVectorAllZeros(Op1.getNode())) + SingleOp = Op0; + else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { + if (Opc == ARMISD::VCGE) + Opc = ARMISD::VCLEZ; + else if (Opc == ARMISD::VCGT) + Opc = ARMISD::VCLTZ; + SingleOp = Op1; + } + + SDValue Result; + if (SingleOp.getNode()) { + switch (Opc) { + case ARMISD::VCEQ: + Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; + case ARMISD::VCGE: + Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; + case ARMISD::VCLEZ: + Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; + case ARMISD::VCGT: + Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; + case ARMISD::VCLTZ: + Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; + default: + Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + } + } else { + Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + } if (Invert) Result = DAG.getNOT(dl, Result, VT); @@ -3026,7 +3280,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { /// operand (e.g., VMOV). If so, return the encoded value. static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, - EVT &VT, bool is128Bits, bool isVMOV) { + EVT &VT, bool is128Bits, NEONModImmType type) { unsigned OpCmode, Imm; // SplatBitSize is set to the smallest size that splats the vector, so a @@ -3039,7 +3293,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, switch (SplatBitSize) { case 8: - if (!isVMOV) + if (type != VMOVModImm) return SDValue(); // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); @@ -3096,6 +3350,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, break; } + // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC + if (type == OtherModImm) return SDValue(); + if ((SplatBits & ~0xffff) == 0 && ((SplatBits | SplatUndef) & 0xff) == 0xff) { // Value = 0x0000nnff: Op=x, Cmode=1100. @@ -3122,7 +3379,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return SDValue(); case 64: { - if (!isVMOV) + if (type != VMOVModImm) return SDValue(); // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. uint64_t BitMask = 0xff; @@ -3376,8 +3633,8 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, // If this is a case we can't handle, return null and let the default // expansion code take care of it. -static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const { BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -3391,10 +3648,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, EVT VmovVT; SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), SplatBitSize, - DAG, VmovVT, VT.is128BitVector(), true); + DAG, VmovVT, VT.is128BitVector(), + VMOVModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } // Try an immediate VMVN. @@ -3402,10 +3660,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, ((1LL << SplatBitSize) - 1)); Val = isNEONModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, - DAG, VmovVT, VT.is128BitVector(), false); + DAG, VmovVT, VT.is128BitVector(), + VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } } } @@ -3439,26 +3698,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (EnableARMVDUPsplat) { - // Use VDUP for non-constant splats. For f32 constant splats, reduce to - // i32 and try again. - if (usesOnlyOneValue && EltSize <= 32) { - if (!isConstant) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); - if (VT.getVectorElementType().isFloatingPoint()) { - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0; i < NumElts; ++i) - Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, - Op.getOperand(i))); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0], - NumElts); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - LowerBUILD_VECTOR(Val, DAG, ST)); - } - SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + // Use VDUP for non-constant splats. For f32 constant splats, reduce to + // i32 and try again. + if (usesOnlyOneValue && EltSize <= 32) { + if (!isConstant) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + if (VT.getVectorElementType().isFloatingPoint()) { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, + Op.getOperand(i))); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) - return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); + if (Val.getNode()) + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); } // If all elements are constants and the case above didn't get hit, fall back @@ -3467,10 +3725,11 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (isConstant) return SDValue(); - if (!EnableARMVDUPsplat) { - // Use VDUP for non-constant splats. - if (usesOnlyOneValue && EltSize <= 32) - return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + // Empirical tests suggest this is rarely worth it for vectors of length <= 2. + if (NumElts >= 4) { + SDValue shuffle = ReconstructShuffle(Op, DAG); + if (shuffle != SDValue()) + return shuffle; } // Vectors with 32- or 64-bit elements can be built by directly assigning @@ -3483,14 +3742,144 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) - Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); + Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Val); } return SDValue(); } +// Gather data to see if the operation can be modelled as a +// shuffle in combination with VEXTs. +SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + SmallVector<SDValue, 2> SourceVecs; + SmallVector<unsigned, 2> MinElts; + SmallVector<unsigned, 2> MaxElts; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { + // A shuffle can only come from building a vector from various + // elements of other vectors. + return SDValue(); + } + + // Record this extraction against the appropriate vector if possible... + SDValue SourceVec = V.getOperand(0); + unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); + bool FoundSource = false; + for (unsigned j = 0; j < SourceVecs.size(); ++j) { + if (SourceVecs[j] == SourceVec) { + if (MinElts[j] > EltNo) + MinElts[j] = EltNo; + if (MaxElts[j] < EltNo) + MaxElts[j] = EltNo; + FoundSource = true; + break; + } + } + + // Or record a new source if not... + if (!FoundSource) { + SourceVecs.push_back(SourceVec); + MinElts.push_back(EltNo); + MaxElts.push_back(EltNo); + } + } + + // Currently only do something sane when at most two source vectors + // involved. + if (SourceVecs.size() > 2) + return SDValue(); + + SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; + int VEXTOffsets[2] = {0, 0}; + + // This loop extracts the usage patterns of the source vectors + // and prepares appropriate SDValues for a shuffle if possible. + for (unsigned i = 0; i < SourceVecs.size(); ++i) { + if (SourceVecs[i].getValueType() == VT) { + // No VEXT necessary + ShuffleSrcs[i] = SourceVecs[i]; + VEXTOffsets[i] = 0; + continue; + } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { + // It probably isn't worth padding out a smaller vector just to + // break it down again in a shuffle. + return SDValue(); + } + + // Since only 64-bit and 128-bit vectors are legal on ARM and + // we've eliminated the other cases... + assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && + "unexpected vector sizes in ReconstructShuffle"); + + if (MaxElts[i] - MinElts[i] >= NumElts) { + // Span too large for a VEXT to cope + return SDValue(); + } + + if (MinElts[i] >= NumElts) { + // The extraction can just take the second half + VEXTOffsets[i] = NumElts; + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + SourceVecs[i], + DAG.getIntPtrConstant(NumElts)); + } else if (MaxElts[i] < NumElts) { + // The extraction can just take the first half + VEXTOffsets[i] = 0; + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + SourceVecs[i], + DAG.getIntPtrConstant(0)); + } else { + // An actual VEXT is needed + VEXTOffsets[i] = MinElts[i]; + SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + SourceVecs[i], + DAG.getIntPtrConstant(0)); + SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + SourceVecs[i], + DAG.getIntPtrConstant(NumElts)); + ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, + DAG.getConstant(VEXTOffsets[i], MVT::i32)); + } + } + + SmallVector<int, 8> Mask; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Entry = Op.getOperand(i); + if (Entry.getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + SDValue ExtractVec = Entry.getOperand(0); + int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) + .getOperand(1))->getSExtValue(); + if (ExtractVec == SourceVecs[0]) { + Mask.push_back(ExtractElt - VEXTOffsets[0]); + } else { + Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); + } + } + + // Final check before we try to produce nonsense... + if (isShuffleMaskLegal(Mask, VT)) + return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], + &Mask[0]); + + return SDValue(); +} + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values @@ -3706,8 +4095,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); + V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) { if (ShuffleMask[i] < 0) @@ -3719,21 +4108,26 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { MVT::i32))); } SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Val); } return SDValue(); } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); - SDValue Vec = Op.getOperand(0); + // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); - assert(VT == MVT::i32 && - Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && - "unexpected type for custom-lowering vector extract"); - return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); + if (!isa<ConstantSDNode>(Lane)) + return SDValue(); + + SDValue Vec = Op.getOperand(0); + if (Op.getValueType() == MVT::i32 && + Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); + } + + return Op; } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { @@ -3747,25 +4141,123 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue Op1 = Op.getOperand(1); if (Op0.getOpcode() != ISD::UNDEF) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), + DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), DAG.getIntPtrConstant(0)); if (Op1.getOpcode() != ISD::UNDEF) Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), + DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), DAG.getIntPtrConstant(1)); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); +} + +/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each +/// element has been zero/sign-extended, depending on the isSigned parameter, +/// from an integer type half its size. +static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, + bool isSigned) { + // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. + EVT VT = N->getValueType(0); + if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { + SDNode *BVN = N->getOperand(0).getNode(); + if (BVN->getValueType(0) != MVT::v4i32 || + BVN->getOpcode() != ISD::BUILD_VECTOR) + return false; + unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; + unsigned HiElt = 1 - LoElt; + ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); + ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); + ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); + ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); + if (!Lo0 || !Hi0 || !Lo1 || !Hi1) + return false; + if (isSigned) { + if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && + Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) + return true; + } else { + if (Hi0->isNullValue() && Hi1->isNullValue()) + return true; + } + return false; + } + + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *Elt = N->getOperand(i).getNode(); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + unsigned HalfSize = EltSize / 2; + if (isSigned) { + int64_t SExtVal = C->getSExtValue(); + if ((SExtVal >> HalfSize) != (SExtVal >> EltSize)) + return false; + } else { + if ((C->getZExtValue() >> HalfSize) != 0) + return false; + } + continue; + } + return false; + } + + return true; +} + +/// isSignExtended - Check if a node is a vector value that is sign-extended +/// or a constant BUILD_VECTOR with sign-extended elements. +static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) + return true; + if (isExtendedBUILD_VECTOR(N, DAG, true)) + return true; + return false; +} + +/// isZeroExtended - Check if a node is a vector value that is zero-extended +/// or a constant BUILD_VECTOR with zero-extended elements. +static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) + return true; + if (isExtendedBUILD_VECTOR(N, DAG, false)) + return true; + return false; } -/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or -/// an extending load, return the unextended value. +/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending +/// load, or BUILD_VECTOR with extended elements, return the unextended value. static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) return N->getOperand(0); - LoadSDNode *LD = cast<LoadSDNode>(N); - return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), - LD->getBasePtr(), LD->getSrcValue(), - LD->getSrcValueOffset(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) + return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), + LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will + // have been legalized as a BITCAST from v4i32. + if (N->getOpcode() == ISD::BITCAST) { + SDNode *BVN = N->getOperand(0).getNode(); + assert(BVN->getOpcode() == ISD::BUILD_VECTOR && + BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); + unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, + BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); + } + // Construct a new BUILD_VECTOR with elements truncated to half the size. + assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); + EVT VT = N->getValueType(0); + unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; + unsigned NumElts = VT.getVectorNumElements(); + MVT TruncVT = MVT::getIntegerVT(EltSize); + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumElts; ++i) { + ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); + const APInt &CInt = C->getAPIntValue(); + Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); } static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { @@ -3776,19 +4268,16 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; - if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) && - (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) { + if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG)) NewOpc = ARMISD::VMULLs; - } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) && - (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) { + else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG)) NewOpc = ARMISD::VMULLu; - } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) { + else if (VT == MVT::v2i64) // Fall through to expand this. It is not legal. return SDValue(); - } else { + else // Other vector multiplications are legal. return Op; - } // Legalize to a VMULL instruction. DebugLoc DL = Op.getDebugLoc(); @@ -3801,6 +4290,181 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(NewOpc, DL, VT, Op0, Op1); } +static SDValue +LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { + // Convert to float + // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); + // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); + X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); + Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); + X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); + Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); + // Get reciprocal estimate. + // float4 recip = vrecpeq_f32(yf); + Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); + // Because char has a smaller range than uchar, we can actually get away + // without any newton steps. This requires that we use a weird bias + // of 0xb000, however (again, this has been exhaustively tested). + // float4 result = as_float4(as_int4(xf*recip) + 0xb000); + X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); + X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); + Y = DAG.getConstant(0xb000, MVT::i32); + Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); + X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); + X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); + // Convert back to short. + X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); + X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); + return X; +} + +static SDValue +LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { + SDValue N2; + // Convert to float. + // float4 yf = vcvt_f32_s32(vmovl_s16(y)); + // float4 xf = vcvt_f32_s32(vmovl_s16(x)); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); + N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); + N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); + + // Use reciprocal estimate and one refinement step. + // float4 recip = vrecpeq_f32(yf); + // recip *= vrecpsq_f32(yf, recip); + N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), + N1, N2); + N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); + // Because short has a smaller range than ushort, we can actually get away + // with only a single newton step. This requires that we use a weird bias + // of 89, however (again, this has been exhaustively tested). + // float4 result = as_float4(as_int4(xf*recip) + 89); + N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); + N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); + N1 = DAG.getConstant(89, MVT::i32); + N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); + N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); + N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); + // Convert back to integer and return. + // return vmovn_s32(vcvt_s32_f32(result)); + N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); + N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); + return N0; +} + +static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + assert((VT == MVT::v4i16 || VT == MVT::v8i8) && + "unexpected type for custom-lowering ISD::SDIV"); + + DebugLoc dl = Op.getDebugLoc(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2, N3; + + if (VT == MVT::v8i8) { + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); + + N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, + DAG.getIntPtrConstant(4)); + N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, + DAG.getIntPtrConstant(4)); + N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, + DAG.getIntPtrConstant(0)); + N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, + DAG.getIntPtrConstant(0)); + + N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 + N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 + + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); + N0 = LowerCONCAT_VECTORS(N0, DAG); + + N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); + return N0; + } + return LowerSDIV_v4i16(N0, N1, dl, DAG); +} + +static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + assert((VT == MVT::v4i16 || VT == MVT::v8i8) && + "unexpected type for custom-lowering ISD::UDIV"); + + DebugLoc dl = Op.getDebugLoc(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2, N3; + + if (VT == MVT::v8i8) { + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); + N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); + + N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, + DAG.getIntPtrConstant(4)); + N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, + DAG.getIntPtrConstant(4)); + N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, + DAG.getIntPtrConstant(0)); + N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, + DAG.getIntPtrConstant(0)); + + N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 + N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 + + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); + N0 = LowerCONCAT_VECTORS(N0, DAG); + + N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, + DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), + N0); + return N0; + } + + // v4i16 sdiv ... Convert to float. + // float4 yf = vcvt_f32_s32(vmovl_u16(y)); + // float4 xf = vcvt_f32_s32(vmovl_u16(x)); + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); + N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); + N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); + N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); + + // Use reciprocal estimate and two refinement steps. + // float4 recip = vrecpeq_f32(yf); + // recip *= vrecpsq_f32(yf, recip); + // recip *= vrecpsq_f32(yf, recip); + N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), + N1, N2); + N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), + N1, N2); + N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); + // Simply multiplying by the reciprocal estimate can leave us a few ulps + // too low, so we add 2 ulps (exhaustive testing shows that this is enough, + // and that it will never cause us to return an answer too large). + // float4 result = as_float4(as_int4(xf*recip) + 89); + N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); + N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); + N1 = DAG.getConstant(2, MVT::i32); + N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); + N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); + N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); + // Convert back to integer and return. + // return vmovn_u32(vcvt_s32_f32(result)); + N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); + N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); + return N0; +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -3816,6 +4480,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); + case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: @@ -3826,9 +4491,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); - case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); + case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); @@ -3843,6 +4509,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + case ISD::SDIV: return LowerSDIV(Op, DAG); + case ISD::UDIV: return LowerUDIV(Op, DAG); } return SDValue(); } @@ -3857,12 +4525,12 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, default: llvm_unreachable("Don't know how to custom expand this!"); break; - case ISD::BIT_CONVERT: - Res = ExpandBIT_CONVERT(N, DAG); + case ISD::BITCAST: + Res = ExpandBITCAST(N, DAG); break; case ISD::SRL: case ISD::SRA: - Res = LowerShift(N, DAG, Subtarget); + Res = Expand64BitShift(N, DAG, Subtarget); break; } if (Res.getNode()) @@ -3892,7 +4560,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); case 1: ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; break; case 2: ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; @@ -4183,6 +4851,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::BCCi64: case ARM::BCCZi64: { + // If there is an unconditional branch to the other successor, remove it. + BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); + // Compare both parts that make up the double comparison separately for // equality. bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; @@ -4341,10 +5012,6 @@ static SDValue PerformMULCombine(SDNode *N, if (Subtarget->isThumb1Only()) return SDValue(); - if (DAG.getMachineFunction(). - getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - return SDValue(); - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); @@ -4389,10 +5056,67 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // Attempt to use immediate-form VBIC + BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VbicVT; + SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VbicVT, VT.is128BitVector(), + OtherModImm); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); + SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); + } + } + } + + return SDValue(); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + // Attempt to use immediate-form VORR + BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && Subtarget->hasNEON() && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VorrVT; + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VorrVT, VT.is128BitVector(), + OtherModImm); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); + SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); + } + } + } + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. @@ -4400,7 +5124,6 @@ static SDValue PerformORCombine(SDNode *N, if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); - SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); // 1) or (and A, mask), val => ARMbfi A, val, mask @@ -4415,40 +5138,46 @@ static SDValue PerformORCombine(SDNode *N, if (N0.getOpcode() != ISD::AND) return SDValue(); - EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); + SDValue N00 = N0.getOperand(0); // The value and the mask need to be constants so we can verify this is // actually a bitfield set. If the mask is 0xffff, we can do better // via a movt instruction, so don't use BFI in that case. - ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (!C) + SDValue MaskOp = N0.getOperand(1); + ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); + if (!MaskC) return SDValue(); - unsigned Mask = C->getZExtValue(); + unsigned Mask = MaskC->getZExtValue(); if (Mask == 0xffff) return SDValue(); SDValue Res; // Case (1): or (and A, mask), val => ARMbfi A, val, mask - if ((C = dyn_cast<ConstantSDNode>(N1))) { - unsigned Val = C->getZExtValue(); - if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val) + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N1C) { + unsigned Val = N1C->getZExtValue(); + if ((Val & ~Mask) != Val) return SDValue(); - Val >>= CountTrailingZeros_32(~Mask); - Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), - DAG.getConstant(Val, MVT::i32), - DAG.getConstant(Mask, MVT::i32)); + if (ARM::isBitFieldInvertedMask(Mask)) { + Val >>= CountTrailingZeros_32(~Mask); - // Do not add new nodes to DAG combiner worklist. - DCI.CombineTo(N, Res, false); + Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, + DAG.getConstant(Val, MVT::i32), + DAG.getConstant(Mask, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + return SDValue(); + } } else if (N1.getOpcode() == ISD::AND) { // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask - C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); - if (!C) + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N11C) return SDValue(); - unsigned Mask2 = C->getZExtValue(); + unsigned Mask2 = N11C->getZExtValue(); if (ARM::isBitFieldInvertedMask(Mask) && ARM::isBitFieldInvertedMask(~Mask2) && @@ -4462,10 +5191,11 @@ static SDValue PerformORCombine(SDNode *N, unsigned lsb = CountTrailingZeros_32(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), DAG.getConstant(lsb, MVT::i32)); - Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res, + Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); + return SDValue(); } else if (ARM::isBitFieldInvertedMask(~Mask) && ARM::isBitFieldInvertedMask(Mask2) && (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { @@ -4476,40 +5206,472 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); // 2b unsigned lsb = CountTrailingZeros_32(Mask); - Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, DAG.getConstant(Mask2, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); + return SDValue(); } } + if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && + N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && + ARM::isBitFieldInvertedMask(~Mask)) { + // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask + // where lsb(mask) == #shamt and masked bits of B are known zero. + SDValue ShAmt = N00.getOperand(1); + unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); + unsigned LSB = CountTrailingZeros_32(Mask); + if (ShAmtC != LSB) + return SDValue(); + + Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), + DAG.getConstant(~Mask, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + } + + return SDValue(); +} + +/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff +/// C1 & C2 == C1. +static SDValue PerformBFICombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::AND) { + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N11C) + return SDValue(); + unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + unsigned Mask2 = N11C->getZExtValue(); + if ((Mask & Mask2) == Mask2) + return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), + N->getOperand(0), N1.getOperand(0), + N->getOperand(2)); + } return SDValue(); } /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // fmrrd(fmdrr x, y) -> x,y + TargetLowering::DAGCombinerInfo &DCI) { + // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); if (InDouble.getOpcode() == ARMISD::VMOVDRR) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); return SDValue(); } +/// PerformVMOVDRRCombine - Target-specific dag combine xforms for +/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. +static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { + // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() == ISD::BITCAST) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::BITCAST) + Op1 = Op1.getOperand(0); + if (Op0.getOpcode() == ARMISD::VMOVRRD && + Op0.getNode() == Op1.getNode() && + Op0.getResNo() == 0 && Op1.getResNo() == 1) + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + N->getValueType(0), Op0.getOperand(0)); + return SDValue(); +} + +/// PerformSTORECombine - Target-specific dag combine xforms for +/// ISD::STORE. +static SDValue PerformSTORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // Bitcast an i64 store extracted from a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. + StoreSDNode *St = cast<StoreSDNode>(N); + SDValue StVal = St->getValue(); + if (!ISD::isNormalStore(St) || St->isVolatile() || + StVal.getValueType() != MVT::i64 || + StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + DebugLoc dl = StVal.getDebugLoc(); + SDValue IntVec = StVal.getOperand(0); + EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, + IntVec.getValueType().getVectorNumElements()); + SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); + SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, + Vec, StVal.getOperand(1)); + dl = N->getDebugLoc(); + SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(Vec.getNode()); + DCI.AddToWorklist(ExtElt.getNode()); + DCI.AddToWorklist(V.getNode()); + return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment(), + St->getTBAAInfo()); +} + +/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node +/// are normal, non-volatile loads. If so, it is profitable to bitcast an +/// i64 vector to have f64 elements, since the value can then be loaded +/// directly into a VFP register. +static bool hasNormalLoadOperand(SDNode *N) { + unsigned NumElts = N->getValueType(0).getVectorNumElements(); + for (unsigned i = 0; i < NumElts; ++i) { + SDNode *Elt = N->getOperand(i).getNode(); + if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) + return true; + } + return false; +} + +/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for +/// ISD::BUILD_VECTOR. +static SDValue PerformBUILD_VECTORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI){ + // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): + // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value + // into a pair of GPRs, which is fine when the value is used as a scalar, + // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. + SelectionDAG &DAG = DCI.DAG; + if (N->getNumOperands() == 2) { + SDValue RV = PerformVMOVDRRCombine(N, DAG); + if (RV.getNode()) + return RV; + } + + // Load i64 elements as f64 values so that type legalization does not split + // them up into i32 values. + EVT VT = N->getValueType(0); + if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) + return SDValue(); + DebugLoc dl = N->getDebugLoc(); + SmallVector<SDValue, 8> Ops; + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); + Ops.push_back(V); + // Make the DAGCombiner fold the bitcast. + DCI.AddToWorklist(V.getNode()); + } + EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); + return DAG.getNode(ISD::BITCAST, dl, VT, BV); +} + +/// PerformInsertEltCombine - Target-specific dag combine xforms for +/// ISD::INSERT_VECTOR_ELT. +static SDValue PerformInsertEltCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // Bitcast an i64 load inserted into a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. + EVT VT = N->getValueType(0); + SDNode *Elt = N->getOperand(1).getNode(); + if (VT.getVectorElementType() != MVT::i64 || + !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + DebugLoc dl = N->getDebugLoc(); + EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, + VT.getVectorNumElements()); + SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); + SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(Vec.getNode()); + DCI.AddToWorklist(V.getNode()); + SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, + Vec, V, N->getOperand(2)); + return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); +} + +/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for +/// ISD::VECTOR_SHUFFLE. +static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { + // The LLVM shufflevector instruction does not require the shuffle mask + // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does + // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the + // operands do not match the mask length, they are extended by concatenating + // them with undef vectors. That is probably the right thing for other + // targets, but for NEON it is better to concatenate two double-register + // size vector operands into a single quad-register size vector. Do that + // transformation here: + // shuffle(concat(v1, undef), concat(v2, undef)) -> + // shuffle(concat(v1, v2), undef) + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + if (Op0.getOpcode() != ISD::CONCAT_VECTORS || + Op1.getOpcode() != ISD::CONCAT_VECTORS || + Op0.getNumOperands() != 2 || + Op1.getNumOperands() != 2) + return SDValue(); + SDValue Concat0Op1 = Op0.getOperand(1); + SDValue Concat1Op1 = Op1.getOperand(1); + if (Concat0Op1.getOpcode() != ISD::UNDEF || + Concat1Op1.getOpcode() != ISD::UNDEF) + return SDValue(); + // Skip the transformation if any of the types are illegal. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = N->getValueType(0); + if (!TLI.isTypeLegal(VT) || + !TLI.isTypeLegal(Concat0Op1.getValueType()) || + !TLI.isTypeLegal(Concat1Op1.getValueType())) + return SDValue(); + + SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + Op0.getOperand(0), Op1.getOperand(0)); + // Translate the shuffle mask. + SmallVector<int, 16> NewMask; + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfElts = NumElts/2; + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + for (unsigned n = 0; n < NumElts; ++n) { + int MaskElt = SVN->getMaskElt(n); + int NewElt = -1; + if (MaskElt < (int)HalfElts) + NewElt = MaskElt; + else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) + NewElt = HalfElts + MaskElt - NumElts; + NewMask.push_back(NewElt); + } + return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, + DAG.getUNDEF(VT), NewMask.data()); +} + +/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and +/// NEON load/store intrinsics to merge base address updates. +static SDValue CombineBaseUpdate(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || + N->getOpcode() == ISD::INTRINSIC_W_CHAIN); + unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); + SDValue Addr = N->getOperand(AddrOpIdx); + + // Search for a use of the address operand that is an increment. + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), + UE = Addr.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() != ISD::ADD || + UI.getUse().getResNo() != Addr.getResNo()) + continue; + + // Check that the add is independent of the load/store. Otherwise, folding + // it would create a cycle. + if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) + continue; + + // Find the new opcode for the updating load/store. + bool isLoad = true; + bool isLaneOp = false; + unsigned NewOpc = 0; + unsigned NumVecs = 0; + if (isIntrinsic) { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: assert(0 && "unexpected intrinsic for Neon base update"); + case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; + NumVecs = 1; break; + case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; + NumVecs = 2; break; + case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; + NumVecs = 3; break; + case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; + NumVecs = 4; break; + case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; + NumVecs = 2; isLaneOp = true; break; + case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; + NumVecs = 3; isLaneOp = true; break; + case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; + NumVecs = 4; isLaneOp = true; break; + case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; + NumVecs = 1; isLoad = false; break; + case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; + NumVecs = 2; isLoad = false; break; + case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; + NumVecs = 3; isLoad = false; break; + case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; + NumVecs = 4; isLoad = false; break; + case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; + NumVecs = 2; isLoad = false; isLaneOp = true; break; + case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; + NumVecs = 3; isLoad = false; isLaneOp = true; break; + case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; + NumVecs = 4; isLoad = false; isLaneOp = true; break; + } + } else { + isLaneOp = true; + switch (N->getOpcode()) { + default: assert(0 && "unexpected opcode for Neon base update"); + case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; + case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; + case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; + } + } + + // Find the size of memory referenced by the load/store. + EVT VecTy; + if (isLoad) + VecTy = N->getValueType(0); + else + VecTy = N->getOperand(AddrOpIdx+1).getValueType(); + unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; + if (isLaneOp) + NumBytes /= VecTy.getVectorNumElements(); + + // If the increment is a constant, it must match the memory ref size. + SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); + if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { + uint64_t IncVal = CInc->getZExtValue(); + if (IncVal != NumBytes) + continue; + } else if (NumBytes >= 3 * 16) { + // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two + // separate instructions that make it harder to use a non-constant update. + continue; + } + + // Create the new updating load/store node. + EVT Tys[6]; + unsigned NumResultVecs = (isLoad ? NumVecs : 0); + unsigned n; + for (n = 0; n < NumResultVecs; ++n) + Tys[n] = VecTy; + Tys[n++] = MVT::i32; + Tys[n] = MVT::Other; + SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); + SmallVector<SDValue, 8> Ops; + Ops.push_back(N->getOperand(0)); // incoming chain + Ops.push_back(N->getOperand(AddrOpIdx)); + Ops.push_back(Inc); + for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { + Ops.push_back(N->getOperand(i)); + } + MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, + Ops.data(), Ops.size(), + MemInt->getMemoryVT(), + MemInt->getMemOperand()); + + // Update the uses. + std::vector<SDValue> NewResults; + for (unsigned i = 0; i < NumResultVecs; ++i) { + NewResults.push_back(SDValue(UpdN.getNode(), i)); + } + NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain + DCI.CombineTo(N, NewResults); + DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); + + break; + } + return SDValue(); +} + +/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a +/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic +/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and +/// return true. +static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + // vldN-dup instructions only support 64-bit vectors for N > 1. + if (!VT.is64BitVector()) + return false; + + // Check if the VDUPLANE operand is a vldN-dup intrinsic. + SDNode *VLD = N->getOperand(0).getNode(); + if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return false; + unsigned NumVecs = 0; + unsigned NewOpc = 0; + unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::arm_neon_vld2lane) { + NumVecs = 2; + NewOpc = ARMISD::VLD2DUP; + } else if (IntNo == Intrinsic::arm_neon_vld3lane) { + NumVecs = 3; + NewOpc = ARMISD::VLD3DUP; + } else if (IntNo == Intrinsic::arm_neon_vld4lane) { + NumVecs = 4; + NewOpc = ARMISD::VLD4DUP; + } else { + return false; + } + + // First check that all the vldN-lane uses are VDUPLANEs and that the lane + // numbers match the load. + unsigned VLDLaneNo = + cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); + for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); + UI != UE; ++UI) { + // Ignore uses of the chain result. + if (UI.getUse().getResNo() == NumVecs) + continue; + SDNode *User = *UI; + if (User->getOpcode() != ARMISD::VDUPLANE || + VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) + return false; + } + + // Create the vldN-dup node. + EVT Tys[5]; + unsigned n; + for (n = 0; n < NumVecs; ++n) + Tys[n] = VT; + Tys[n] = MVT::Other; + SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); + SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; + MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); + SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, + Ops, 2, VLDMemInt->getMemoryVT(), + VLDMemInt->getMemOperand()); + + // Update the uses. + for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); + UI != UE; ++UI) { + unsigned ResNo = UI.getUse().getResNo(); + // Ignore uses of the chain result. + if (ResNo == NumVecs) + continue; + SDNode *User = *UI; + DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); + } + + // Now the vldN-lane intrinsic is dead except for its chain result. + // Update uses of the chain. + std::vector<SDValue> VLDDupResults; + for (unsigned n = 0; n < NumVecs; ++n) + VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); + VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); + DCI.CombineTo(VLD, VLDDupResults); + + return true; +} + /// PerformVDUPLANECombine - Target-specific dag combine xforms for /// ARMISD::VDUPLANE. static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is - // redundant. SDValue Op = N->getOperand(0); - EVT VT = N->getValueType(0); - // Ignore bit_converts. - while (Op.getOpcode() == ISD::BIT_CONVERT) + // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses + // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. + if (CombineVLDDUP(N, DCI)) + return SDValue(N, 0); + + // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is + // redundant. Ignore bit_converts for now; element sizes are checked below. + while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) return SDValue(); @@ -4521,11 +5683,11 @@ static SDValue PerformVDUPLANECombine(SDNode *N, unsigned EltBits; if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) EltSize = 8; + EVT VT = N->getValueType(0); if (EltSize > VT.getVectorElementType().getSizeInBits()) return SDValue(); - SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); - return DCI.CombineTo(N, Res, false); + return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); } /// getVShiftImm - Check if this is a valid build_vector for the immediate @@ -4533,7 +5695,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, /// build_vector must have the same constant integer value. static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { // Ignore bit_converts. - while (Op.getOpcode() == ISD::BIT_CONVERT) + while (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); APInt SplatBits, SplatUndef; @@ -4747,7 +5909,8 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); // Nothing to be done for scalar shifts. - if (! VT.isVector()) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); assert(ST->hasNEON() && "unexpected vector shift"); @@ -4793,7 +5956,8 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, if (VT == MVT::i32 && (EltVT == MVT::i8 || EltVT == MVT::i16) && - TLI.isTypeLegal(Vec.getValueType())) { + TLI.isTypeLegal(Vec.getValueType()) && + isa<ConstantSDNode>(Lane)) { unsigned Opc = 0; switch (N->getOpcode()) { @@ -4906,7 +6070,14 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); + case ISD::AND: return PerformANDCombine(N, DCI); + case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); + case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); + case ISD::STORE: return PerformSTORECombine(N, DCI); + case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); + case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); + case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: @@ -4916,20 +6087,42 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); + case ARMISD::VLD2DUP: + case ARMISD::VLD3DUP: + case ARMISD::VLD4DUP: + return CombineBaseUpdate(N, DCI); + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: + switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: + return CombineBaseUpdate(N, DCI); + default: break; + } + break; } return SDValue(); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { - if (!Subtarget->hasV6Ops()) - // Pre-v6 does not support unaligned mem access. - return false; +bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, + EVT VT) const { + return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); +} - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - // FIXME: This is pretty conservative. Should we provide cmdline option to - // control the behaviour? - if (!Subtarget->isTargetDarwin()) +bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { + if (!Subtarget->allowsUnalignedMem()) return false; switch (VT.getSimpleVT().SimpleTy) { @@ -5143,7 +6336,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { if (!Subtarget->isThumb()) return ARM_AM::getSOImmVal(Imm) != -1; if (Subtarget->isThumb2()) - return ARM_AM::getT2SOImmVal(Imm) != -1; + return ARM_AM::getT2SOImmVal(Imm) != -1; return Imm >= 0 && Imm <= 255; } @@ -5348,6 +6541,37 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, // ARM Inline Assembly Support //===----------------------------------------------------------------------===// +bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { + // Looking for "rev" which is V6+. + if (!Subtarget->hasV6Ops()) + return false; + + InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); + std::string AsmStr = IA->getAsmString(); + SmallVector<StringRef, 4> AsmPieces; + SplitString(AsmStr, AsmPieces, ";\n"); + + switch (AsmPieces.size()) { + default: return false; + case 1: + AsmStr = AsmPieces[0]; + AsmPieces.clear(); + SplitString(AsmStr, AsmPieces, " \t,"); + + // rev $0, $1 + if (AsmPieces.size() == 3 && + AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && + IA->getConstraintString().compare(0, 4, "=l,l") == 0) { + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (Ty && Ty->getBitWidth() == 32) + return IntrinsicLowering::LowerToByteSwap(CI); + } + break; + } + + return false; +} + /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. ARMTargetLowering::ConstraintType @@ -5362,6 +6586,40 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +ARMTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + const Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'l': + if (type->isIntegerTy()) { + if (Subtarget->isThumb()) + weight = CW_SpecificReg; + else + weight = CW_Register; + } + break; + case 'w': + if (type->isFloatingPointTy()) + weight = CW_Register; + break; + } + return weight; +} + std::pair<unsigned, const TargetRegisterClass*> ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { @@ -5664,3 +6922,63 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return ARM::getVFPf64Imm(Imm) != -1; return false; } + +/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as +/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment +/// specified in the intrinsic calls. +bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const { + switch (Intrinsic) { + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + // Conservatively set memVT to the entire set of vectors loaded. + uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); + Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); + Info.vol = false; // volatile loads with NEON intrinsics not supported + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + Info.opc = ISD::INTRINSIC_VOID; + // Conservatively set memVT to the entire set of vectors stored. + unsigned NumElts = 0; + for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { + const Type *ArgTy = I.getArgOperand(ArgI)->getType(); + if (!ArgTy->isVectorTy()) + break; + NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; + } + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); + Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); + Info.vol = false; // volatile stores with NEON intrinsics not supported + Info.readMem = false; + Info.writeMem = true; + return true; + } + default: + break; + } + + return false; +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index ba9ea7f..dc400c4 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -34,6 +34,10 @@ namespace llvm { Wrapper, // Wrapper - A wrapper node for TargetConstantPool, // TargetExternalSymbol, and TargetGlobalAddress. + WrapperDYN, // WrapperDYN - A wrapper node for TargetGlobalAddress in + // DYN mode. + WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in + // PIC mode. WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable CALL, // Function call. @@ -47,8 +51,6 @@ namespace llvm { PIC_ADD, // Add with a PC operand and a PIC label. - AND, // ARM "and" instruction that sets the 's' flag in CPSR. - CMP, // ARM compare instructions. CMPZ, // ARM compare that sets only Z flag. CMPFP, // ARM VFP compare instruction, sets FPSCR. @@ -73,8 +75,9 @@ namespace llvm { VMOVRRD, // double to two gprs. VMOVDRR, // Two gprs to double. - EH_SJLJ_SETJMP, // SjLj exception handling setjmp. - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + EH_SJLJ_SETJMP, // SjLj exception handling setjmp. + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup. TC_RETURN, // Tail call return pseudo. @@ -82,13 +85,20 @@ namespace llvm { DYN_ALLOC, // Dynamic allocation on the stack. - MEMBARRIER, // Memory barrier - SYNCBARRIER, // Memory sync barrier + MEMBARRIER, // Memory barrier (DMB) + MEMBARRIER_MCR, // Memory barrier (MCR) + + PRELOAD, // Preload VCEQ, // Vector compare equal. + VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. + VCGEZ, // Vector compare greater than or equal to zero. + VCLEZ, // Vector compare less than or equal to zero. VCGEU, // Vector compare unsigned greater than or equal. VCGT, // Vector compare greater than. + VCGTZ, // Vector compare greater than zero. + VCLTZ, // Vector compare less than zero. VCGTU, // Vector compare unsigned greater than. VTST, // Vector test bits. @@ -161,7 +171,38 @@ namespace llvm { FMIN, // Bit-field insert - BFI + BFI, + + // Vector OR with immediate + VORRIMM, + // Vector AND with NOT of immediate + VBICIMM, + + // Vector load N-element structure to all lanes: + VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, + VLD3DUP, + VLD4DUP, + + // NEON loads with post-increment base updates: + VLD1_UPD, + VLD2_UPD, + VLD3_UPD, + VLD4_UPD, + VLD2LN_UPD, + VLD3LN_UPD, + VLD4LN_UPD, + VLD2DUP_UPD, + VLD3DUP_UPD, + VLD4DUP_UPD, + + // NEON stores with post-increment base updates: + VST1_UPD, + VST2_UPD, + VST3_UPD, + VST4_UPD, + VST2LN_UPD, + VST3LN_UPD, + VST4LN_UPD }; } @@ -193,14 +234,16 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual const char *getTargetNodeName(unsigned Opcode) const; virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const; + /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON? @@ -241,7 +284,15 @@ namespace llvm { unsigned Depth) const; + virtual bool ExpandInlineAsm(CallInst *CI) const; + ConstraintType getConstraintType(const std::string &Constraint) const; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -290,6 +341,9 @@ namespace llvm { /// materialize the FP immediate as a load from a constant pool. virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const; protected: std::pair<const TargetRegisterClass*, uint8_t> findRepresentativeClass(EVT VT) const; @@ -301,6 +355,8 @@ namespace llvm { const TargetRegisterInfo *RegInfo; + const InstrItineraryData *Itins; + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. /// unsigned ARMPCLabelIndex; @@ -329,6 +385,7 @@ namespace llvm { ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -350,6 +407,10 @@ namespace llvm { SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const; + + SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -393,6 +454,8 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; + virtual bool isUsedByReturnOnly(SDNode *N) const; + SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, @@ -410,6 +473,13 @@ namespace llvm { }; + enum NEONModImmType { + VMOVModImm, + VMVNModImm, + OtherModImm + }; + + namespace ARM { FastISel *createFastISel(FunctionLoweringInfo &funcInfo); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 113cfff..765cba4 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1,4 +1,4 @@ -//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=// +//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -71,7 +71,7 @@ def NVTBLFrm : Format<41>; // Misc flags. -// the instruction has a Rn register operand. +// The instruction has an Rn register operand. // UnaryDP - Indicates this is a unary data processing instruction, i.e. // it doesn't have a Rn operand. class UnaryDP { bit isUnaryDataProc = 1; } @@ -84,9 +84,10 @@ class Xform16Bit { bit canXformTo16Bit = 1; } // ARM Instruction flags. These need to match ARMBaseInstrInfo.h. // +// FIXME: Once the JIT is MC-ized, these can go away. // Addressing mode. -class AddrMode<bits<4> val> { - bits<4> Value = val; +class AddrMode<bits<5> val> { + bits<5> Value = val; } def AddrModeNone : AddrMode<0>; def AddrMode1 : AddrMode<1>; @@ -104,6 +105,7 @@ def AddrModeT2_i8 : AddrMode<12>; def AddrModeT2_so : AddrMode<13>; def AddrModeT2_pc : AddrMode<14>; def AddrModeT2_i8s4 : AddrMode<15>; +def AddrMode_i12 : AddrMode<16>; // Instruction size. class SizeFlagVal<bits<3> val> { @@ -134,7 +136,6 @@ def NeonDomain : Domain<2>; // Instructions in Neon domain only def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains //===----------------------------------------------------------------------===// - // ARM special operands. // @@ -143,6 +144,39 @@ def CondCodeOperand : AsmOperandClass { let SuperClasses = []; } +def CCOutOperand : AsmOperandClass { + let Name = "CCOut"; + let SuperClasses = []; +} + +def MemBarrierOptOperand : AsmOperandClass { + let Name = "MemBarrierOpt"; + let SuperClasses = []; + let ParserMethod = "tryParseMemBarrierOptOperand"; +} + +def ProcIFlagsOperand : AsmOperandClass { + let Name = "ProcIFlags"; + let SuperClasses = []; + let ParserMethod = "tryParseProcIFlagsOperand"; +} + +def MSRMaskOperand : AsmOperandClass { + let Name = "MSRMask"; + let SuperClasses = []; + let ParserMethod = "tryParseMSRMaskOperand"; +} + +// ARM imod and iflag operands, used only by the CPS instruction. +def imod_op : Operand<i32> { + let PrintMethod = "printCPSIMod"; +} + +def iflags_op : Operand<i32> { + let PrintMethod = "printCPSIFlag"; + let ParserMatchClass = ProcIFlagsOperand; +} + // ARM Predicate operand. Default to 14 = always (AL). Second part is CC // register whose default is 0 (no register). def pred : PredicateOperand<OtherVT, (ops i32imm, CCR), @@ -153,16 +187,23 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, CCR), // Conditional code result for instructions whose 's' bit is set, e.g. subs. def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> { + let EncoderMethod = "getCCOutOpValue"; let PrintMethod = "printSBitModifierOperand"; + let ParserMatchClass = CCOutOperand; } // Same as cc_out except it defaults to setting CPSR. def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> { + let EncoderMethod = "getCCOutOpValue"; let PrintMethod = "printSBitModifierOperand"; + let ParserMatchClass = CCOutOperand; } // ARM special operands for disassembly only. // +def setend_op : Operand<i32> { + let PrintMethod = "printSetendOperand"; +} def cps_opt : Operand<i32> { let PrintMethod = "printCPSOptionOperand"; @@ -170,6 +211,7 @@ def cps_opt : Operand<i32> { def msr_mask : Operand<i32> { let PrintMethod = "printMSRMaskOperand"; + let ParserMatchClass = MSRMaskOperand; } // A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. @@ -179,7 +221,6 @@ def neg_zero : Operand<i32> { } //===----------------------------------------------------------------------===// - // ARM Instruction templates. // @@ -198,14 +239,17 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, bit isUnaryDataProc = 0; bit canXformTo16Bit = 0; + // If this is a pseudo instruction, mark it isCodeGenOnly. + let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo"); + // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h. - let TSFlags{3-0} = AM.Value; - let TSFlags{6-4} = SZ.Value; - let TSFlags{8-7} = IndexModeBits; - let TSFlags{14-9} = Form; - let TSFlags{15} = isUnaryDataProc; - let TSFlags{16} = canXformTo16Bit; - let TSFlags{18-17} = D.Value; + let TSFlags{4-0} = AM.Value; + let TSFlags{7-5} = SZ.Value; + let TSFlags{9-8} = IndexModeBits; + let TSFlags{15-10} = Form; + let TSFlags{16} = isUnaryDataProc; + let TSFlags{17} = canXformTo16Bit; + let TSFlags{19-18} = D.Value; let Constraints = cstr; let Itinerary = itin; @@ -225,25 +269,51 @@ class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im, Format f, Domain d, string cstr, InstrItinClass itin> : InstTemplate<am, sz, im, f, d, cstr, itin>; -class PseudoInst<dag oops, dag iops, InstrItinClass itin, - string asm, list<dag> pattern> +class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern> + // FIXME: This really should derive from InstTemplate instead, as pseudos + // don't need encoding information. TableGen doesn't like that + // currently. Need to figure out why and fix it. : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain, "", itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; let Pattern = pattern; } +// PseudoInst that's ARM-mode only. +class ARMPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin, + list<dag> pattern> + : PseudoInst<oops, iops, itin, pattern> { + let SZ = sz; + list<Predicate> Predicates = [IsARM]; +} + +// PseudoInst that's Thumb-mode only. +class tPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin, + list<dag> pattern> + : PseudoInst<oops, iops, itin, pattern> { + let SZ = sz; + list<Predicate> Predicates = [IsThumb]; +} + +// PseudoInst that's Thumb2-mode only. +class t2PseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin, + list<dag> pattern> + : PseudoInst<oops, iops, itin, pattern> { + let SZ = sz; + list<Predicate> Predicates = [IsThumb2]; +} // Almost all ARM instructions are predicable. class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { + bits<4> p; + let Inst{31-28} = p; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -270,9 +340,14 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { + bits<4> p; // Predicate operand + bits<1> s; // condition-code set flag ('1' if the insn should set the flags) + let Inst{31-28} = p; + let Inst{20} = s; + let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); + let AsmString = !strconcat(opc, "${s}${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -319,10 +394,6 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, asm, "", pattern> { let Inst{27-24} = opcod; } -class ABXIx2<dag oops, dag iops, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin, - asm, "", pattern>; // BR_JT instructions class JTI<dag oops, dag iops, InstrItinClass itin, @@ -335,19 +406,42 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { + bits<4> Rt; + bits<4> Rn; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rt; let Inst{11-0} = 0b111110011111; } class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, opc, asm, "", pattern> { + bits<4> Rd; + bits<4> Rt; + bits<4> Rn; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; let Inst{11-4} = 0b11111001; + let Inst{3-0} = Rt; +} +class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> + : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> { + bits<4> Rt; + bits<4> Rt2; + bits<4> Rn; + let Inst{27-23} = 0b00010; + let Inst{22} = b; + let Inst{21-20} = 0b00; + let Inst{19-16} = Rn; + let Inst{15-12} = Rt; + let Inst{11-4} = 0b00001001; + let Inst{3-0} = Rt2; } // addrmode1 instructions @@ -372,387 +466,125 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, let Inst{24-21} = opcod; let Inst{27-26} = 0b00; } -class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern>; - - -// addrmode2 loads and stores -class AI2<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{27-26} = 0b01; -} // loads -class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern> { - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 1; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern> { - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 1; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} - -// stores -class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern> { - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 1; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern> { - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 1; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -// Pre-indexed loads -class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit - let Inst{22} = 1; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} - -// Pre-indexed stores -class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} -class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, itin, +// LDR/LDRB/STR/STRB/... +class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am, + Format f, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm, + "", pattern> { + let Inst{27-25} = op; + let Inst{24} = 1; // 24 == P + // 23 == U + let Inst{22} = isByte; + let Inst{21} = 0; // 21 == W + let Inst{20} = isLd; +} +// Indexed load/stores +class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode2, Size4Bytes, im, f, itin, opc, asm, cstr, pattern> { - let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit - let Inst{22} = 1; // B bit - let Inst{24} = 1; // P bit - let Inst{27-26} = 0b01; -} - -// Post-indexed loads -class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 0; // P bit - let Inst{27-26} = 0b01; -} -class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 1; // B bit - let Inst{24} = 0; // P bit - let Inst{27-26} = 0b01; -} - -// Post-indexed stores -class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 0; // P bit - let Inst{27-26} = 0b01; -} -class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 1; // B bit - let Inst{24} = 0; // P bit + bits<4> Rt; let Inst{27-26} = 0b01; + let Inst{24} = isPre; // P bit + let Inst{22} = isByte; // B bit + let Inst{21} = isPre; // W bit + let Inst{20} = isLd; // L bit + let Inst{15-12} = Rt; +} +class AI2stridx<bit isByte, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> Rn; + let Inst{25} = offset{13}; + let Inst{23} = offset{12}; + let Inst{19-16} = Rn; + let Inst{11-0} = offset{11-0}; } // addrmode3 instructions -class AI3<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern>; -class AXI3<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern>; - -// loads -class AI3ldh<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} -class AXI3ldh<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit -} -class AI3ldsh<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f, + InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit + bits<14> addr; + bits<4> Rt; let Inst{27-25} = 0b000; -} -class AXI3ldsh<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit -} -class AI3ldsb<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 0; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit + let Inst{24} = 1; // P bit + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{21} = 0; // W bit + let Inst{20} = op20; // L bit + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; // Rt + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{7-4} = op; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm +} + +class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin, + opc, asm, cstr, pattern> { + bits<4> Rt; let Inst{27-25} = 0b000; -} -class AXI3ldsb<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 0; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit -} -class AI3ldd<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 0; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit + let Inst{24} = isPre; // P bit + let Inst{21} = isPre; // W bit + let Inst{20} = op20; // L bit + let Inst{15-12} = Rt; // Rt + let Inst{7-4} = op; +} +class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM3 store w/ two operands: (GPR, am3offset) + bits<14> offset; + bits<4> Rt; + bits<4> Rn; let Inst{27-25} = 0b000; + let Inst{23} = offset{8}; + let Inst{22} = offset{9}; + let Inst{19-16} = Rn; + let Inst{15-12} = Rt; // Rt + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{7-4} = op; + let Inst{3-0} = offset{3-0}; // imm3_0/Rm } // stores -class AI3sth<dag oops, dag iops, Format f, InstrItinClass itin, +class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, opc, asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} -class AXI3sth<dag oops, dag iops, Format f, InstrItinClass itin, - string asm, list<dag> pattern> - : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit -} -class AI3std<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 1; // P bit + bits<14> addr; + bits<4> Rt; let Inst{27-25} = 0b000; + let Inst{24} = 1; // P bit + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{21} = 0; // W bit + let Inst{20} = 0; // L bit + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; // Rt + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{7-4} = op; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm } -// Pre-indexed loads -class AI3ldhpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} -class AI3ldshpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} -class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 0; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 1; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} -class AI3lddpr<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 0; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 1; // W bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b000; -} - - // Pre-indexed stores class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -781,60 +613,6 @@ class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin, let Inst{27-25} = 0b000; } -// Post-indexed loads -class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 0; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 0; // P bit - let Inst{27-25} = 0b000; -} -class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - let Inst{4} = 1; - let Inst{5} = 1; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 0; // P bit - let Inst{27-25} = 0b000; -} -class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, - opc, asm, cstr,pattern> { - let Inst{4} = 1; - let Inst{5} = 0; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 0; // P bit - let Inst{27-25} = 0b000; -} -class AI3lddpo<dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, string cstr, list<dag> pattern> - : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, - opc, asm, cstr, pattern> { - let Inst{4} = 1; - let Inst{5} = 0; // H bit - let Inst{6} = 1; // S bit - let Inst{7} = 1; - let Inst{20} = 0; // L bit - let Inst{21} = 0; // W bit - let Inst{24} = 0; // P bit - let Inst{27-25} = 0b000; -} - // Post-indexed stores class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -864,21 +642,17 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin, } // addrmode4 instructions -class AXI4ld<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, - asm, cstr, pattern> { - let Inst{20} = 1; // L bit - let Inst{22} = 0; // S bit +class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, + string asm, string cstr, list<dag> pattern> + : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, asm, cstr, pattern> { + bits<4> p; + bits<16> regs; + bits<4> Rn; + let Inst{31-28} = p; let Inst{27-25} = 0b100; -} -class AXI4st<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, - string asm, string cstr, list<dag> pattern> - : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, - asm, cstr, pattern> { - let Inst{20} = 0; // L bit let Inst{22} = 0; // S bit - let Inst{27-25} = 0b100; + let Inst{19-16} = Rn; + let Inst{15-0} = regs; } // Unsigned multiply, multiply-accumulate instructions. @@ -899,24 +673,65 @@ class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, } // Most significant word multiply -class AMul2I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin, opc, asm, "", pattern> { - let Inst{7-4} = 0b1001; + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{7-4} = opc7_4; let Inst{20} = 1; let Inst{27-21} = opcod; + let Inst{19-16} = Rd; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +// MSW multiple w/ Ra operand +class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> { + bits<4> Ra; + let Inst{15-12} = Ra; } // SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y> -class AMulxyI<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin, opc, asm, "", pattern> { + bits<4> Rn; + bits<4> Rm; let Inst{4} = 0; let Inst{7} = 1; let Inst{20} = 0; let Inst{27-21} = opcod; + let Inst{6-5} = bit6_5; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + let Inst{19-16} = Rd; +} + +// AMulxyI with Ra operand +class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> { + bits<4> Ra; + let Inst{15-12} = Ra; +} +// SMLAL* +class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; } // Extend instructions. @@ -924,16 +739,47 @@ class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin, opc, asm, "", pattern> { + // All AExtI instructions have Rd and Rm register operands. + bits<4> Rd; + bits<4> Rm; + let Inst{15-12} = Rd; + let Inst{3-0} = Rm; let Inst{7-4} = 0b0111; + let Inst{9-8} = 0b00; let Inst{27-20} = opcod; } // Misc Arithmetic instructions. -class AMiscA1I<bits<8> opcod, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> +class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin, opc, asm, "", pattern> { + bits<4> Rd; + bits<4> Rm; let Inst{27-20} = opcod; + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-8} = 0b1111; + let Inst{7-4} = opc7_4; + let Inst{3-0} = Rm; +} + +// PKH instructions +class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin, + opc, asm, "", pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + bits<8> sh; + let Inst{27-20} = opcod; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-7} = sh{7-3}; + let Inst{6} = tb; + let Inst{5-4} = 0b01; + let Inst{3-0} = Rm; } //===----------------------------------------------------------------------===// @@ -950,12 +796,9 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { } //===----------------------------------------------------------------------===// -// // Thumb Instruction Format Definitions. // -// TI - Thumb instruction. - class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { @@ -966,6 +809,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsThumb]; } +// TI - Thumb instruction. class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>; @@ -986,6 +830,13 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3, let Inst{12} = opcod3; } +// Move to/from coprocessor instructions +class T1Cop<dag oops, dag iops, string asm, list<dag> pattern> + : ThumbI<oops, iops, AddrModeNone, Size4Bytes, NoItinerary, asm, "", pattern>, + Encoding, Requires<[IsThumb, HasV6]> { + let Inst{31-28} = 0b1110; +} + // BR_JT instructions class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> @@ -999,7 +850,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let InOperandList = iops; let AsmString = asm; let Pattern = pattern; - list<Predicate> Predicates = [IsThumb1Only]; + list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } class T1I<dag oops, dag iops, InstrItinClass itin, @@ -1008,9 +859,6 @@ class T1I<dag oops, dag iops, InstrItinClass itin, class T1Ix2<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> : Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>; -class T1JTI<dag oops, dag iops, InstrItinClass itin, - string asm, list<dag> pattern> - : Thumb1I<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>; // Two-address instructions class T1It<dag oops, dag iops, InstrItinClass itin, @@ -1025,9 +873,9 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = !con(oops, (outs s_cc_out:$s)); let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); + let AsmString = !strconcat(opc, "${s}${p}", asm); let Pattern = pattern; - list<Predicate> Predicates = [IsThumb1Only]; + list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } class T1sI<dag oops, dag iops, InstrItinClass itin, @@ -1038,7 +886,7 @@ class T1sI<dag oops, dag iops, InstrItinClass itin, class T1sIt<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, - "$lhs = $dst", pattern>; + "$Rn = $Rdn", pattern>; // Thumb1 instruction that can be predicated. class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, @@ -1047,9 +895,9 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; - list<Predicate> Predicates = [IsThumb1Only]; + list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } class T1pI<dag oops, dag iops, InstrItinClass itin, @@ -1060,17 +908,8 @@ class T1pI<dag oops, dag iops, InstrItinClass itin, class T1pIt<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, - "$lhs = $dst", pattern>; + "$Rn = $Rdn", pattern>; -class T1pI1<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : Thumb1pI<oops, iops, AddrModeT1_1, Size2Bytes, itin, opc, asm, "", pattern>; -class T1pI2<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : Thumb1pI<oops, iops, AddrModeT1_2, Size2Bytes, itin, opc, asm, "", pattern>; -class T1pI4<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : Thumb1pI<oops, iops, AddrModeT1_4, Size2Bytes, itin, opc, asm, "", pattern>; class T1pIs<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>; @@ -1099,7 +938,7 @@ class T1DataProcessing<bits<4> opcode> : Encoding16 { // A6.2.3 Special data instructions and branch and exchange encoding. class T1Special<bits<4> opcode> : Encoding16 { let Inst{15-10} = 0b010001; - let Inst{9-6} = opcode; + let Inst{9-6} = opcode; } // A6.2.4 Load/store single data item encoding. @@ -1107,12 +946,37 @@ class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 { let Inst{15-12} = opA; let Inst{11-9} = opB; } -class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>; -class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes -class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte -class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative +// Helper classes to encode Thumb1 loads and stores. For immediates, the +// following bits are used for "opA" (see A6.2.4): +// +// 0b0110 => Immediate, 4 bytes +// 0b1000 => Immediate, 2 bytes +// 0b0111 => Immediate, 1 byte +class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>, + T1LoadStore<0b0101, opcode> { + bits<3> Rt; + bits<8> addr; + let Inst{8-6} = addr{5-3}; // Rm + let Inst{5-3} = addr{2-0}; // Rn + let Inst{2-0} = Rt; +} +class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>, + T1LoadStore<opA, {opB,?,?}> { + bits<3> Rt; + bits<8> addr; + let Inst{10-6} = addr{7-3}; // imm5 + let Inst{5-3} = addr{2-0}; // Rn + let Inst{2-0} = Rt; +} + // A6.2.5 Miscellaneous 16-bit instructions encoding. class T1Misc<bits<7> opcode> : Encoding16 { let Inst{15-12} = 0b1011; @@ -1126,7 +990,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; } @@ -1134,16 +998,19 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, // Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an // input operand since by default it's a zero register. It will become an // implicit def once it's "flipped". -// +// // FIXME: This uses unified syntax so {s} comes before {p}. We should make it // more consistent. class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { + bits<1> s; // condition-code set flag ('1' if the insn should set the flags) + let Inst{20} = s; + let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); + let AsmString = !strconcat(opc, "${s}${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; } @@ -1168,7 +1035,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let InOperandList = iops; let AsmString = asm; let Pattern = pattern; - list<Predicate> Predicates = [IsThumb1Only]; + list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } class T2I<dag oops, dag iops, InstrItinClass itin, @@ -1186,17 +1053,23 @@ class T2Iso<dag oops, dag iops, InstrItinClass itin, class T2Ipc<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>; -class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin, +class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "", pattern> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b00; + bits<4> Rt; + bits<4> Rt2; + bits<13> addr; + let Inst{31-25} = 0b1110100; let Inst{24} = P; - let Inst{23} = ?; // The U bit. + let Inst{23} = addr{8}; let Inst{22} = 1; let Inst{21} = W; - let Inst{20} = load; + let Inst{20} = isLoad; + let Inst{19-16} = addr{12-9}; + let Inst{15-12} = Rt{3-0}; + let Inst{11-8} = Rt2{3-0}; + let Inst{7-0} = addr{7-0}; } class T2sI<dag oops, dag iops, InstrItinClass itin, @@ -1210,9 +1083,11 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>; -class T2Ix2<dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list<dag> pattern> - : Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>; +// Move to/from coprocessor instructions +class T2Cop<dag oops, dag iops, string asm, list<dag> pattern> + : T2XI<oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2, HasV6]> { + let Inst{31-28} = 0b1111; +} // Two-address instructions class T2XIt<dag oops, dag iops, InstrItinClass itin, @@ -1227,7 +1102,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; let Inst{31-27} = 0b11111; @@ -1240,29 +1115,25 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed let Inst{10} = pre; // The P bit. let Inst{8} = 1; // The W bit. -} -// Helper class for disassembly only -// A6.3.16 & A6.3.17 -// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions. -class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { - let Inst{31-27} = 0b11111; - let Inst{26-24} = 0b011; - let Inst{23} = long; - let Inst{22-20} = op22_20; - let Inst{7-4} = op7_4; + bits<9> addr; + let Inst{7-0} = addr{7-0}; + let Inst{9} = addr{8}; // Sign bit + + bits<4> Rt; + bits<4> Rn; + let Inst{15-12} = Rt{3-0}; + let Inst{19-16} = Rn{3-0}; } // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> { - list<Predicate> Predicates = [IsThumb1Only, HasV5T]; + list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T]; } // T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode. class T1Pat<dag pattern, dag result> : Pat<pattern, result> { - list<Predicate> Predicates = [IsThumb1Only]; + list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } // T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode. @@ -1281,10 +1152,13 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { + bits<4> p; + let Inst{31-28} = p; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, "${p}", asm); let Pattern = pattern; + let PostEncoderMethod = "VFPThumb2PostEncoder"; list<Predicate> Predicates = [HasVFP2]; } @@ -1293,17 +1167,22 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { + bits<4> p; + let Inst{31-28} = p; let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; let Pattern = pattern; + let PostEncoderMethod = "VFPThumb2PostEncoder"; list<Predicate> Predicates = [HasVFP2]; } class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, - opc, asm, "", pattern>; + opc, asm, "", pattern> { + let PostEncoderMethod = "VFPThumb2PostEncoder"; +} // ARM VFP addrmode5 loads and stores class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, @@ -1311,12 +1190,24 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, string opc, string asm, list<dag> pattern> : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, VFPLdStFrm, itin, opc, asm, "", pattern> { + // Instruction operands. + bits<5> Dd; + bits<13> addr; + + // Encode instruction operands. + let Inst{23} = addr{8}; // U (add = (U == '1')) + let Inst{22} = Dd{4}; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Dd{3-0}; + let Inst{7-0} = addr{7-0}; // imm8 + // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; - let Inst{11-8} = 0b1011; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision - // 64-bit loads & stores operate on both NEON and VFP pipelines. + // Loads & stores operate on both NEON and VFP pipelines. let D = VFPNeonDomain; } @@ -1325,10 +1216,36 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, string opc, string asm, list<dag> pattern> : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone, VFPLdStFrm, itin, opc, asm, "", pattern> { + // Instruction operands. + bits<5> Sd; + bits<13> addr; + + // Encode instruction operands. + let Inst{23} = addr{8}; // U (add = (U == '1')) + let Inst{22} = Sd{0}; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Sd{4-1}; + let Inst{7-0} = addr{7-0}; // imm8 + // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; - let Inst{11-8} = 0b1010; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + + // Loads & stores operate on both NEON and VFP pipelines. + let D = VFPNeonDomain; +} + +// VFP Load / store multiple pseudo instructions. +class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr, + list<dag> pattern> + : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain, + cstr, itin> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let Pattern = pattern; + list<Predicate> Predicates = [HasVFP2]; } // Load / store multiple @@ -1336,21 +1253,40 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { + // Instruction operands. + bits<4> Rn; + bits<13> regs; + + // Encode instruction operands. + let Inst{19-16} = Rn; + let Inst{22} = regs{12}; + let Inst{15-12} = regs{11-8}; + let Inst{7-0} = regs{7-0}; + // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; - let Inst{11-8} = 0b1011; - - // 64-bit loads & stores operate on both NEON and VFP pipelines. - let D = VFPNeonDomain; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision } class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : VFPXI<oops, iops, AddrMode4, Size4Bytes, im, VFPLdStMulFrm, itin, asm, cstr, pattern> { + // Instruction operands. + bits<4> Rn; + bits<13> regs; + + // Encode instruction operands. + let Inst{19-16} = Rn; + let Inst{22} = regs{8}; + let Inst{15-12} = regs{12-9}; + let Inst{7-0} = regs{7-0}; + // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; - let Inst{11-8} = 0b1010; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision } // Double precision, unary @@ -1358,10 +1294,21 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { + // Instruction operands. + bits<5> Dd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{19-16} = opcod3; - let Inst{11-8} = 0b1011; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision let Inst{7-6} = opcod4; let Inst{4} = opcod5; } @@ -1371,24 +1318,25 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { - let Inst{27-23} = opcod1; - let Inst{21-20} = opcod2; - let Inst{11-8} = 0b1011; - let Inst{6} = op6; - let Inst{4} = op4; -} + // Instruction operands. + bits<5> Dd; + bits<5> Dn; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; -// Double precision, binary, VML[AS] (for additional predicate) -class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, - dag iops, InstrItinClass itin, string opc, string asm, - list<dag> pattern> - : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; - let Inst{11-8} = 0b1011; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision let Inst{6} = op6; let Inst{4} = op4; - list<Predicate> Predicates = [HasVFP2, UseVMLx]; } // Single precision, unary @@ -1396,16 +1344,27 @@ class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; let Inst{19-16} = opcod3; - let Inst{11-8} = 0b1010; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision let Inst{7-6} = opcod4; let Inst{4} = opcod5; } -// Single precision unary, if no NEON -// Same as ASuI except not available if NEON is enabled +// Single precision unary, if no NEON. Same as ASuI except not available if +// NEON is enabled. class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -1418,20 +1377,47 @@ class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{27-23} = opcod1; let Inst{21-20} = opcod2; - let Inst{11-8} = 0b1010; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision let Inst{6} = op6; let Inst{4} = op4; } -// Single precision binary, if no NEON -// Same as ASbI except not available if NEON is enabled +// Single precision binary, if no NEON. Same as ASbI except not available if +// NEON is enabled. class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> { list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; } // VFP conversion instructions @@ -1502,9 +1488,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat( - !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), - !strconcat("\t", asm)); + let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; } @@ -1516,7 +1500,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm)); + let AsmString = !strconcat(opc, "${p}", "\t", asm); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; } @@ -1531,6 +1515,25 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, let Inst{21-20} = op21_20; let Inst{11-8} = op11_8; let Inst{7-4} = op7_4; + + let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder"; + + bits<5> Vd; + bits<6> Rn; + bits<4> Rm; + + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{19-16} = Rn{3-0}; + let Inst{3-0} = Rm{3-0}; +} + +class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc, + dt, asm, cstr, pattern> { + bits<3> lane; } class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr> @@ -1541,11 +1544,22 @@ class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr> list<Predicate> Predicates = [HasNEON]; } +class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr, + list<dag> pattern> + : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr, + itin> { + let OutOperandList = oops; + let InOperandList = !con(iops, (ins pred:$p)); + let Pattern = pattern; + list<Predicate> Predicates = [HasNEON]; +} + class NDataI<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr, pattern> { let Inst{31-25} = 0b1111001; + let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; } class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin, @@ -1553,6 +1567,7 @@ class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin, : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm, cstr, pattern> { let Inst{31-25} = 0b1111001; + let PostEncoderMethod = "NEONThumb2DataIPostEncoder"; } // NEON "one register and a modified immediate" format. @@ -1569,6 +1584,16 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6, let Inst{6} = op6; let Inst{5} = op5; let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<13> SIMM; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{24} = SIMM{7}; + let Inst{18-16} = SIMM{6-4}; + let Inst{3-0} = SIMM{3-0}; } // NEON 2 vector register format. @@ -1584,6 +1609,15 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{11-7} = op11_7; let Inst{6} = op6; let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vm; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; } // Same as N2V except it doesn't have a datatype suffix. @@ -1599,6 +1633,15 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{11-7} = op11_7; let Inst{6} = op6; let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vm; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; } // NEON 2 vector register with immediate. @@ -1612,6 +1655,17 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, let Inst{7} = op7; let Inst{6} = op6; let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vm; + bits<6> SIMM; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; + let Inst{21-16} = SIMM{5-0}; } // NEON 3 vector register format. @@ -1625,6 +1679,18 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{11-8} = op11_8; let Inst{6} = op6; let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; } // Same as N3V except it doesn't have a data type suffix. @@ -1639,13 +1705,25 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, let Inst{11-8} = op11_8; let Inst{6} = op6; let Inst{4} = op4; + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = Vm{4}; } // NEON VMOVs between scalar and core registers. class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> - : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain, + : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain, "", itin> { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; @@ -1654,11 +1732,21 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat( - !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), - !strconcat("\t", asm)); + let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; + + let PostEncoderMethod = "NEONThumb2DupPostEncoder"; + + bits<5> V; + bits<4> R; + bits<4> p; + bits<4> lane; + + let Inst{31-28} = p{3-0}; + let Inst{7} = V{4}; + let Inst{19-16} = V{3-0}; + let Inst{15-12} = R{3-0}; } class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, @@ -1687,6 +1775,15 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, let Inst{11-7} = 0b11000; let Inst{6} = op6; let Inst{4} = 0; + + bits<5> Vd; + bits<5> Vm; + bits<4> lane; + + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; } // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index ba228ff..6f48d96 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -33,13 +33,13 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { default: break; case ARM::LDR_PRE: case ARM::LDR_POST: - return ARM::LDR; + return ARM::LDRi12; case ARM::LDRH_PRE: case ARM::LDRH_POST: return ARM::LDRH; case ARM::LDRB_PRE: case ARM::LDRB_POST: - return ARM::LDRB; + return ARM::LDRBi12; case ARM::LDRSH_PRE: case ARM::LDRSH_POST: return ARM::LDRSH; @@ -48,39 +48,14 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return ARM::LDRSB; case ARM::STR_PRE: case ARM::STR_POST: - return ARM::STR; + return ARM::STRi12; case ARM::STRH_PRE: case ARM::STRH_POST: return ARM::STRH; case ARM::STRB_PRE: case ARM::STRB_POST: - return ARM::STRB; + return ARM::STRBi12; } return 0; } - -void ARMInstrInfo:: -reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo &TRI) const { - DebugLoc dl = Orig->getDebugLoc(); - unsigned Opcode = Orig->getOpcode(); - switch (Opcode) { - default: - break; - case ARM::MOVi2pieces: { - RI.emitLoadConstPool(MBB, I, dl, - DestReg, SubIdx, - Orig->getOperand(1).getImm(), - (ARMCC::CondCodes)Orig->getOperand(2).getImm(), - Orig->getOperand(3).getReg()); - MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); - return; - } - } - - return ARMBaseInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, TRI); -} - diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 4563ffe..f2c7bdc 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -32,11 +32,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig, - const TargetRegisterInfo &TRI) const; - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e66f9b9..c827ce3d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -58,10 +58,9 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>; -def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>; -def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + +def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -70,33 +69,35 @@ def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; +def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; +def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, - [SDNPInFlag]>; + [SDNPInGlue]>; def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov, - [SDNPInFlag]>; + [SDNPInGlue]>; def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, [SDNPHasChain]>; @@ -106,40 +107,38 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, [SDNPHasChain]>; -def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd, - [SDNPOutFlag]>; - def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, - [SDNPOutFlag]>; + [SDNPOutGlue]>; def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, - [SDNPOutFlag, SDNPCommutative]>; + [SDNPOutGlue, SDNPCommutative]>; def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; -def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; -def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; -def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; +def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; +def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>; +def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", - SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; + SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; +def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP", + SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>; + def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, [SDNPHasChain]>; -def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER, - [SDNPHasChain]>; -def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR, - [SDNPHasChain]>; -def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR, +def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain]>; +def ARMPreload : SDNode<"ARMISD::PRELOAD", SDTPrefetch, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; -def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, - [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; +def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; @@ -147,34 +146,40 @@ def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // -def HasV4T : Predicate<"Subtarget->hasV4TOps()">; +def HasV4T : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate; def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; def HasV5T : Predicate<"Subtarget->hasV5TOps()">; -def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; -def HasV6 : Predicate<"Subtarget->hasV6Ops()">; -def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; +def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate; +def HasV6 : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate; +def NoV6 : Predicate<"!Subtarget->hasV6Ops()">; +def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; -def HasV7 : Predicate<"Subtarget->hasV7Ops()">; +def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; -def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; -def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; -def HasNEON : Predicate<"Subtarget->hasNEON()">; -def HasDivide : Predicate<"Subtarget->hasDivide()">; -def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">; -def HasDB : Predicate<"Subtarget->hasDataBarrier()">; +def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate; +def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate; +def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate; +def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate; +def HasDivide : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate; +def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, + AssemblerPredicate; +def HasDB : Predicate<"Subtarget->hasDataBarrier()">, + AssemblerPredicate; +def HasMP : Predicate<"Subtarget->hasMPExtension()">, + AssemblerPredicate; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; -def IsThumb : Predicate<"Subtarget->isThumb()">; +def IsThumb : Predicate<"Subtarget->isThumb()">, AssemblerPredicate; def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; -def IsThumb2 : Predicate<"Subtarget->isThumb2()">; -def IsARM : Predicate<"!Subtarget->isThumb()">; +def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate; +def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate; def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; -def UseVMLx : Predicate<"Subtarget->useVMLx()">; +def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -199,12 +204,6 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; -// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24. -def rot_imm : PatLeaf<(i32 imm), [{ - int32_t v = (int32_t)N->getZExtValue(); - return v == 8 || v == 16 || v == 24; -}]>; - /// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. def imm1_15 : PatLeaf<(i32 imm), [{ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16; @@ -217,12 +216,12 @@ def imm16_31 : PatLeaf<(i32 imm), [{ def so_imm_neg : PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1; + return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; }], so_imm_neg_XFORM>; def so_imm_not : PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(~(int)N->getZExtValue()) != -1; + return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; }], so_imm_not_XFORM>; // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. @@ -230,15 +229,6 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; }]>; -/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield -/// e.g., 0xf000ffff -def bf_inv_mask_imm : Operand<i32>, - PatLeaf<(imm), [{ - return ARM::isBitFieldInvertedMask(N->getZExtValue()); -}] > { - let PrintMethod = "printBitfieldInvMaskImmOperand"; -} - /// Split a 32-bit immediate into two 16 bit parts. def hi16 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32); @@ -273,28 +263,103 @@ def sube_live_carry : PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), [{return N->hasAnyUseOfValue(1);}]>; +// An 'and' node with a single use. +def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +// An 'xor' node with a single use. +def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +// An 'fmul' node with a single use. +def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{ + return N->hasOneUse(); +}]>; + +// An 'fadd' node which checks for single non-hazardous use. +def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{ + return hasNoVMLxHazardUse(N); +}]>; + +// An 'fsub' node which checks for single non-hazardous use. +def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ + return hasNoVMLxHazardUse(N); +}]>; + //===----------------------------------------------------------------------===// // Operand Definitions. // // Branch target. -def brtarget : Operand<OtherVT>; +// FIXME: rename brtarget to t2_brtarget +def brtarget : Operand<OtherVT> { + let EncoderMethod = "getBranchTargetOpValue"; +} + +// FIXME: get rid of this one? +def uncondbrtarget : Operand<OtherVT> { + let EncoderMethod = "getUnconditionalBranchTargetOpValue"; +} + +// Branch target for ARM. Handles conditional/unconditional +def br_target : Operand<OtherVT> { + let EncoderMethod = "getARMBranchTargetOpValue"; +} + +// Call target. +// FIXME: rename bltarget to t2_bl_target? +def bltarget : Operand<i32> { + // Encoded the same as branch targets. + let EncoderMethod = "getBranchTargetOpValue"; +} + +// Call target for ARM. Handles conditional/unconditional +// FIXME: rename bl_target to t2_bltarget? +def bl_target : Operand<i32> { + // Encoded the same as branch targets. + let EncoderMethod = "getARMBranchTargetOpValue"; +} + // A list of registers separated by comma. Used by load/store multiple. +def RegListAsmOperand : AsmOperandClass { + let Name = "RegList"; + let SuperClasses = []; +} + +def DPRRegListAsmOperand : AsmOperandClass { + let Name = "DPRRegList"; + let SuperClasses = []; +} + +def SPRRegListAsmOperand : AsmOperandClass { + let Name = "SPRRegList"; + let SuperClasses = []; +} + def reglist : Operand<i32> { + let EncoderMethod = "getRegisterListOpValue"; + let ParserMatchClass = RegListAsmOperand; let PrintMethod = "printRegisterList"; } -// An operand for the CONSTPOOL_ENTRY pseudo-instruction. -def cpinst_operand : Operand<i32> { - let PrintMethod = "printCPInstOperand"; +def dpr_reglist : Operand<i32> { + let EncoderMethod = "getRegisterListOpValue"; + let ParserMatchClass = DPRRegListAsmOperand; + let PrintMethod = "printRegisterList"; } -def jtblock_operand : Operand<i32> { - let PrintMethod = "printJTBlockOperand"; +def spr_reglist : Operand<i32> { + let EncoderMethod = "getRegisterListOpValue"; + let ParserMatchClass = SPRRegListAsmOperand; + let PrintMethod = "printRegisterList"; } -def jt2block_operand : Operand<i32> { - let PrintMethod = "printJT2BlockOperand"; + +// An operand for the CONSTPOOL_ENTRY pseudo-instruction. +def cpinst_operand : Operand<i32> { + let PrintMethod = "printCPInstOperand"; } // Local PC labels. @@ -302,6 +367,22 @@ def pclabel : Operand<i32> { let PrintMethod = "printPCLabel"; } +// ADR instruction labels. +def adrlabel : Operand<i32> { + let EncoderMethod = "getAdrLabelOpValue"; +} + +def neon_vcvt_imm32 : Operand<i32> { + let EncoderMethod = "getNEONVcvtImm32OpValue"; +} + +// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. +def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{ + int32_t v = (int32_t)N->getZExtValue(); + return v == 8 || v == 16 || v == 24; }]> { + let EncoderMethod = "getRotImmOpValue"; +} + // shift_imm: An integer that encodes a shift amount and the type of shift // (currently either asr or lsl) using the same encoding used for the // immediates in so_reg operands. @@ -313,73 +394,120 @@ def shift_imm : Operand<i32> { def so_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectShifterOperandReg", [shl,srl,sra,rotr]> { + let EncoderMethod = "getSORegOpValue"; + let PrintMethod = "printSORegOperand"; + let MIOperandInfo = (ops GPR, GPR, i32imm); +} +def shift_so_reg : Operand<i32>, // reg reg imm + ComplexPattern<i32, 3, "SelectShiftShifterOperandReg", + [shl,srl,sra,rotr]> { + let EncoderMethod = "getSORegOpValue"; let PrintMethod = "printSORegOperand"; let MIOperandInfo = (ops GPR, GPR, i32imm); } // so_imm - Match a 32-bit shifter_operand immediate operand, which is an -// 8-bit immediate rotated by an arbitrary number of bits. so_imm values are -// represented in the imm field in the same 12-bit form that they are encoded -// into so_imm instructions: the 8-bit immediate is the least significant bits -// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11]. +// 8-bit immediate rotated by an arbitrary number of bits. def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> { + let EncoderMethod = "getSOImmOpValue"; let PrintMethod = "printSOImmOperand"; } // Break so_imm's up into two pieces. This handles immediates with up to 16 // bits set in them. This uses so_imm2part to match and so_imm2part_[12] to // get the first/second pieces. -def so_imm2part : Operand<i32>, - PatLeaf<(imm), [{ +def so_imm2part : PatLeaf<(imm), [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); - }]> { - let PrintMethod = "printSOImm2PartOperand"; -} +}]>; -def so_imm2part_1 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); +/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true. +/// +def arm_i32imm : PatLeaf<(imm), [{ + if (Subtarget->hasV6T2Ops()) + return true; + return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; -def so_imm2part_2 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); +/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. +def imm0_31 : Operand<i32>, PatLeaf<(imm), [{ + return (int32_t)N->getZExtValue() < 32; }]>; -def so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::isSOImmTwoPartVal(-(int)N->getZExtValue()); - }]> { - let PrintMethod = "printSOImm2PartOperand"; +/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'. +def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{ + return (int32_t)N->getZExtValue() < 32; +}]> { + let EncoderMethod = "getImmMinusOneOpValue"; } -def so_neg_imm2part_1 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getSOImmTwoPartFirst(-(int)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; +// i32imm_hilo16 - For movt/movw - sets the MC Encoder method. +// The imm is split into imm{15-12}, imm{11-0} +// +def i32imm_hilo16 : Operand<i32> { + let EncoderMethod = "getHiLo16ImmOpValue"; +} -def so_neg_imm2part_2 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getSOImmTwoPartSecond(-(int)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; +/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield +/// e.g., 0xf000ffff +def bf_inv_mask_imm : Operand<i32>, + PatLeaf<(imm), [{ + return ARM::isBitFieldInvertedMask(N->getZExtValue()); +}] > { + let EncoderMethod = "getBitfieldInvertedMaskOpValue"; + let PrintMethod = "printBitfieldInvMaskImmOperand"; +} -/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def imm0_31 : Operand<i32>, PatLeaf<(imm), [{ - return (int32_t)N->getZExtValue() < 32; +/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p +def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{ + return isInt<5>(N->getSExtValue()); }]>; +/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p +def width_imm : Operand<i32>, PatLeaf<(imm), [{ + return N->getSExtValue() > 0 && N->getSExtValue() <= 32; +}] > { + let EncoderMethod = "getMsbOpValue"; +} + // Define ARM specific addressing modes. -// addrmode2 := reg +/- reg shop imm + +// addrmode_imm12 := reg +/- imm12 +// +def addrmode_imm12 : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrModeImm12", []> { + // 12-bit immediate operand. Note that instructions using this encode + // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other + // immediate values are as normal. + + let EncoderMethod = "getAddrModeImm12OpValue"; + let PrintMethod = "printAddrModeImm12Operand"; + let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); +} +// ldst_so_reg := reg +/- reg shop imm +// +def ldst_so_reg : Operand<i32>, + ComplexPattern<i32, 3, "SelectLdStSOReg", []> { + let EncoderMethod = "getLdStSORegOpValue"; + // FIXME: Simplify the printer + let PrintMethod = "printAddrMode2Operand"; + let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); +} + // addrmode2 := reg +/- imm12 +// := reg +/- reg shop imm // def addrmode2 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode2", []> { + let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } def am2offset : Operand<i32>, - ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> { + ComplexPattern<i32, 2, "SelectAddrMode2Offset", + [], [SDNPWantRoot]> { + let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; let MIOperandInfo = (ops GPR, i32imm); } @@ -389,22 +517,29 @@ def am2offset : Operand<i32>, // def addrmode3 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode3", []> { + let EncoderMethod = "getAddrMode3OpValue"; let PrintMethod = "printAddrMode3Operand"; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } def am3offset : Operand<i32>, - ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> { + ComplexPattern<i32, 2, "SelectAddrMode3Offset", + [], [SDNPWantRoot]> { + let EncoderMethod = "getAddrMode3OffsetOpValue"; let PrintMethod = "printAddrMode3OffsetOperand"; let MIOperandInfo = (ops GPR, i32imm); } -// addrmode4 := reg, <mode|W> +// ldstm_mode := {ia, ib, da, db} // -def addrmode4 : Operand<i32>, - ComplexPattern<i32, 2, "SelectAddrMode4", []> { - let PrintMethod = "printAddrMode4Operand"; - let MIOperandInfo = (ops GPR:$addr, i32imm); +def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> { + let EncoderMethod = "getLdStmModeOpValue"; + let PrintMethod = "printLdStmModeOperand"; +} + +def MemMode5AsmOperand : AsmOperandClass { + let Name = "MemMode5"; + let SuperClasses = []; } // addrmode5 := reg +/- imm8*4 @@ -413,19 +548,32 @@ def addrmode5 : Operand<i32>, ComplexPattern<i32, 2, "SelectAddrMode5", []> { let PrintMethod = "printAddrMode5Operand"; let MIOperandInfo = (ops GPR:$base, i32imm); + let ParserMatchClass = MemMode5AsmOperand; + let EncoderMethod = "getAddrMode5OpValue"; } -// addrmode6 := reg with optional writeback +// addrmode6 := reg with optional alignment // def addrmode6 : Operand<i32>, - ComplexPattern<i32, 2, "SelectAddrMode6", []> { + ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6AddressOpValue"; } def am6offset : Operand<i32> { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); + let EncoderMethod = "getAddrMode6OffsetOpValue"; +} + +// Special version of addrmode6 to handle alignment encoding for VLD-dup +// instructions, specifically VLD4-dup. +def addrmode6dup : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6DupAddressOpValue"; } // addrmodepc := pc + reg @@ -440,6 +588,28 @@ def nohash_imm : Operand<i32> { let PrintMethod = "printNoHashImmediate"; } +def CoprocNumAsmOperand : AsmOperandClass { + let Name = "CoprocNum"; + let SuperClasses = []; + let ParserMethod = "tryParseCoprocNumOperand"; +} + +def CoprocRegAsmOperand : AsmOperandClass { + let Name = "CoprocReg"; + let SuperClasses = []; + let ParserMethod = "tryParseCoprocRegOperand"; +} + +def p_imm : Operand<i32> { + let PrintMethod = "printPImmediate"; + let ParserMatchClass = CoprocNumAsmOperand; +} + +def c_imm : Operand<i32> { + let PrintMethod = "printCImmediate"; + let ParserMatchClass = CoprocRegAsmOperand; +} + //===----------------------------------------------------------------------===// include "ARMInstrFormats.td" @@ -450,55 +620,93 @@ include "ARMInstrFormats.td" /// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a /// binop that produces a value. -multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { +multiclass AsI1_bin_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { - def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + iii, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; let Inst{25} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; } } - def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, - IIC_iALUr, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { - let Inst{11-4} = 0b00000000; + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + iir, opc, "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; let Inst{25} = 0; let isCommutable = Commutable; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; } - def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { + def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; let Inst{25} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = shift; } } /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the /// instruction modifies the CPSR register. -let Defs = [CPSR] in { -multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { - def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { - let Inst{20} = 1; +let isCodeGenOnly = 1, Defs = [CPSR] in { +multiclass AI1_bin_s_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { + def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + iii, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; let Inst{25} = 1; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; } - def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, - IIC_iALUr, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { + def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + iir, opc, "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; let isCommutable = Commutable; - let Inst{11-4} = 0b00000000; - let Inst{20} = 1; let Inst{25} = 0; - } - def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + } + def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, + iis, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-0} = shift; } } } @@ -507,146 +715,233 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, /// patterns. Similar to AsI1_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. let isCompare = 1, Defs = [CPSR] in { -multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { - def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi, - opc, "\t$a, $b", - [(opnode GPR:$a, so_imm:$b)]> { - let Inst{20} = 1; +multiclass AI1_cmp_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { + def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, + opc, "\t$Rn, $imm", + [(opnode GPR:$Rn, so_imm:$imm)]> { + bits<4> Rn; + bits<12> imm; let Inst{25} = 1; - } - def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr, - opc, "\t$a, $b", - [(opnode GPR:$a, GPR:$b)]> { - let Inst{11-4} = 0b00000000; let Inst{20} = 1; - let Inst{25} = 0; - let isCommutable = Commutable; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-0} = imm; } - def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr, - opc, "\t$a, $b", - [(opnode GPR:$a, so_reg:$b)]> { + def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, + opc, "\t$Rn, $Rm", + [(opnode GPR:$Rn, GPR:$Rm)]> { + bits<4> Rn; + bits<4> Rm; + let isCommutable = Commutable; + let Inst{25} = 0; let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + } + def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis, + opc, "\t$Rn, $shift", + [(opnode GPR:$Rn, so_reg:$shift)]> { + bits<4> Rn; + bits<12> shift; let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-0} = shift; } } } -/// AI_unary_rrot - A unary operation with two forms: one whose operand is a +/// AI_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. /// FIXME: Remove the 'r' variant. Its rot_imm is zero. -multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> { - def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src), - IIC_iUNAr, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>, +multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> { + def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iEXTr, opc, "\t$Rd, $Rm", + [(set GPR:$Rd, (opnode GPR:$Rm))]>, Requires<[IsARM, HasV6]> { - let Inst{11-10} = 0b00; + bits<4> Rd; + bits<4> Rm; let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-10} = 0b00; + let Inst{3-0} = Rm; } - def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot), - IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot", + [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>, Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rm; + bits<2> rot; let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-10} = rot; + let Inst{3-0} = Rm; } } -multiclass AI_unary_rrot_np<bits<8> opcod, string opc> { - def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src), - IIC_iUNAr, opc, "\t$dst, $src", +multiclass AI_ext_rrot_np<bits<8> opcod, string opc> { + def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iEXTr, opc, "\t$Rd, $Rm", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6]> { - let Inst{11-10} = 0b00; let Inst{19-16} = 0b1111; + let Inst{11-10} = 0b00; } - def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot), - IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", + def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6]> { + bits<2> rot; let Inst{19-16} = 0b1111; + let Inst{11-10} = rot; } } -/// AI_bin_rrot - A binary operation with two forms: one whose operand is a +/// AI_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> { - def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), - IIC_iALUr, opc, "\t$dst, $LHS, $RHS", - [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, +multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> { + def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; let Inst{11-10} = 0b00; + let Inst{9-4} = 0b000111; + let Inst{3-0} = Rm; + } + def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, + rot_imm:$rot), + IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot", + [(set GPR:$Rd, (opnode GPR:$Rn, + (rotr GPR:$Rm, rot_imm:$rot)))]>, + Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + bits<2> rot; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-10} = rot; + let Inst{9-4} = 0b000111; + let Inst{3-0} = Rm; } - def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, - i32imm:$rot), - IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot", - [(set GPR:$dst, (opnode GPR:$LHS, - (rotr GPR:$RHS, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]>; } // For disassembly only. -multiclass AI_bin_rrot_np<bits<8> opcod, string opc> { - def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), - IIC_iALUr, opc, "\t$dst, $LHS, $RHS", +multiclass AI_exta_rrot_np<bits<8> opcod, string opc> { + def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6]> { let Inst{11-10} = 0b00; } - def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, - i32imm:$rot), - IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot", + def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, + rot_imm:$rot), + IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot", [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]> { + bits<4> Rn; + bits<2> rot; + let Inst{19-16} = Rn; + let Inst{11-10} = rot; + } } /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. let Uses = [CPSR] in { multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { - def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, Requires<[IsARM]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; let Inst{25} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{11-0} = imm; } - def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, + def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM]> { - let isCommutable = Commutable; + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; let Inst{11-4} = 0b00000000; let Inst{25} = 0; + let isCommutable = Commutable; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; } - def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b", - [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, + def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>, Requires<[IsARM]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; let Inst{25} = 0; + let Inst{11-0} = shift; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; } } // Carry setting variants -let Defs = [CPSR] in { +let isCodeGenOnly = 1, Defs = [CPSR] in { multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { - def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"), - [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, + def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"), + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, Requires<[IsARM]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{11-0} = imm; let Inst{20} = 1; let Inst{25} = 1; } - def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"), - [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, + def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"), + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; let Inst{11-4} = 0b00000000; + let isCommutable = Commutable; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; let Inst{20} = 1; let Inst{25} = 0; } - def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"), - [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, + def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"), + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>, Requires<[IsARM]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{11-0} = shift; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; let Inst{20} = 1; let Inst{25} = 0; } @@ -654,6 +949,62 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, } } +let canFoldAsLoad = 1, isReMaterializable = 1 in { +multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii, + InstrItinClass iir, PatFrag opnode> { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr), + AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr", + [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift), + AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift", + [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> { + bits<4> Rt; + bits<17> shift; + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} +} + +multiclass AI_str1<bit isByte, string opc, InstrItinClass iii, + InstrItinClass iir, PatFrag opnode> { + // Note: We use the complex addrmode_imm12 rather than just an input + // GPR and a constrained immediate so that we can use this to match + // frame index references and avoid matching constant pool references. + def i12 : AI2ldst<0b010, 0, isByte, (outs), + (ins GPR:$Rt, addrmode_imm12:$addr), + AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr", + [(opnode GPR:$Rt, addrmode_imm12:$addr)]> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 + } + def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift), + AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift", + [(opnode GPR:$Rt, ldst_so_reg:$shift)]> { + bits<4> Rt; + bits<17> shift; + let Inst{23} = shift{12}; // U (add = ('U' == 1)) + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = Rt; + let Inst{11-0} = shift{11-0}; + } +} //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -669,8 +1020,7 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let neverHasSideEffects = 1, isNotDuplicable = 1 in def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, - i32imm:$size), NoItinerary, - "${instid:label} ${cpidx:cpentry}", []>; + i32imm:$size), NoItinerary, []>; // FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE // from removing one half of the matched pairs. That breaks PEI, which assumes @@ -678,12 +1028,10 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, - "${:comment} ADJCALLSTACKUP $amt1", [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, - "${:comment} ADJCALLSTACKDOWN $amt", [(ARMcallseq_start timm:$amt)]>; } @@ -691,6 +1039,7 @@ def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; + let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000000; } @@ -698,6 +1047,7 @@ def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; + let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000001; } @@ -705,6 +1055,7 @@ def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; + let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000010; } @@ -712,6 +1063,7 @@ def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; + let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000011; } @@ -719,14 +1071,22 @@ def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel", "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; let Inst{27-20} = 0b01101000; let Inst{7-4} = 0b1011; + let Inst{11-8} = 0b1111; } def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6T2]> { let Inst{27-16} = 0b001100100000; + let Inst{15-8} = 0b11110000; let Inst{7-0} = 0b00000100; } @@ -735,154 +1095,174 @@ def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> { + bits<16> val; + let Inst{3-0} = val{3-0}; + let Inst{19-8} = val{15-4}; let Inst{27-20} = 0b00010010; let Inst{7-4} = 0b0111; } -// Change Processor State is a system instruction -- for disassembly only. -// The singleton $opt operand contains the following information: -// opt{4-0} = mode from Inst{4-0} -// opt{5} = changemode from Inst{17} -// opt{8-6} = AIF from Inst{8-6} -// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable -def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM]> { +// Change Processor State is a system instruction -- for disassembly and +// parsing only. +// FIXME: Since the asm parser has currently no clean way to handle optional +// operands, create 3 versions of the same instruction. Once there's a clean +// framework to represent optional operands, change this behavior. +class CPS<dag iops, string asm_ops> + : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops), + [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> { + bits<2> imod; + bits<3> iflags; + bits<5> mode; + bit M; + let Inst{31-28} = 0b1111; let Inst{27-20} = 0b00010000; - let Inst{16} = 0; - let Inst{5} = 0; + let Inst{19-18} = imod; + let Inst{17} = M; // Enabled if mode is set; + let Inst{16} = 0; + let Inst{8-6} = iflags; + let Inst{5} = 0; + let Inst{4-0} = mode; } +let M = 1 in + def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode), + "$imod\t$iflags, $mode">; +let mode = 0, M = 0 in + def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">; + +let imod = 0, iflags = 0, M = 1 in + def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">; + // Preload signals the memory system of possible future data/instruction access. // These are for disassembly only. -// -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -multiclass APreLoad<bit data, bit read, string opc> { +multiclass APreLoad<bits<1> read, bits<1> data, string opc> { - def i : AXI<(outs), (ins GPR:$base, neg_zero:$imm), MiscFrm, NoItinerary, - !strconcat(opc, "\t[$base, $imm]"), []> { + def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, + !strconcat(opc, "\t$addr"), + [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> { + bits<4> Rt; + bits<17> addr; let Inst{31-26} = 0b111101; let Inst{25} = 0; // 0 for immediate form let Inst{24} = data; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) let Inst{22} = read; let Inst{21-20} = 0b01; + let Inst{19-16} = addr{16-13}; // Rn + let Inst{15-12} = 0b1111; + let Inst{11-0} = addr{11-0}; // imm12 } - def r : AXI<(outs), (ins addrmode2:$addr), MiscFrm, NoItinerary, - !strconcat(opc, "\t$addr"), []> { + def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload, + !strconcat(opc, "\t$shift"), + [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> { + bits<17> shift; let Inst{31-26} = 0b111101; let Inst{25} = 1; // 1 for register form let Inst{24} = data; + let Inst{23} = shift{12}; // U (add = ('U' == 1)) let Inst{22} = read; let Inst{21-20} = 0b01; - let Inst{4} = 0; + let Inst{19-16} = shift{16-13}; // Rn + let Inst{15-12} = 0b1111; + let Inst{11-0} = shift{11-0}; } } -defm PLD : APreLoad<1, 1, "pld">; -defm PLDW : APreLoad<1, 0, "pldw">; -defm PLI : APreLoad<0, 1, "pli">; - -def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM]> { - let Inst{31-28} = 0b1111; - let Inst{27-20} = 0b00010000; - let Inst{16} = 1; - let Inst{9} = 1; - let Inst{7-4} = 0b0000; -} +defm PLD : APreLoad<1, 1, "pld">, Requires<[IsARM]>; +defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>; +defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>; -def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle", - [/* For disassembly only; pattern left blank */]>, +def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary, + "setend\t$end", + [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> { - let Inst{31-28} = 0b1111; - let Inst{27-20} = 0b00010000; - let Inst{16} = 1; - let Inst{9} = 0; - let Inst{7-4} = 0b0000; + bits<1> end; + let Inst{31-10} = 0b1111000100000001000000; + let Inst{9} = end; + let Inst{8-0} = 0; } def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV7]> { - let Inst{27-16} = 0b001100100000; - let Inst{7-4} = 0b1111; + bits<4> opt; + let Inst{27-4} = 0b001100100000111100001111; + let Inst{3-0} = opt; } // A5.4 Permanently UNDEFINED instructions. -// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to -// binutils let isBarrier = 1, isTerminator = 1 in -def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, - ".long 0xe7ffdefe ${:comment} trap", [(trap)]>, +def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, + "trap", [(trap)]>, Requires<[IsARM]> { - let Inst{27-25} = 0b011; - let Inst{24-20} = 0b11111; - let Inst{7-5} = 0b111; - let Inst{4} = 0b1; + let Inst = 0xe7ffdefe; } // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { -def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), - Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p\t$dst, pc, $a", - [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; +def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), + Size4Bytes, IIC_iALUr, + [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; let AddedComplexity = 10 in { -def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr", - [(set GPR:$dst, (load addrmodepc:$addr))]>; +def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), + Size4Bytes, IIC_iLoad_r, + [(set GPR:$dst, (load addrmodepc:$addr))]>; -def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr", - [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>; +def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), + Size4Bytes, IIC_iLoad_bh_r, + [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>; -def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr", - [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>; +def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), + Size4Bytes, IIC_iLoad_bh_r, + [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>; -def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr", - [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>; +def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), + Size4Bytes, IIC_iLoad_bh_r, + [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>; -def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr", - [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>; +def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p), + Size4Bytes, IIC_iLoad_bh_r, + [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>; } let AddedComplexity = 10 in { -def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p\t$src, $addr", - [(store GPR:$src, addrmodepc:$addr)]>; +def PICSTR : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Size4Bytes, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>; -def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr", - [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; +def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Size4Bytes, IIC_iStore_bh_r, [(truncstorei16 GPR:$src, + addrmodepc:$addr)]>; -def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr", - [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; +def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), + Size4Bytes, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; } } // isNotDuplicable = 1 // LEApcrel - Load a pc-relative address into a register without offending the // assembler. -let neverHasSideEffects = 1 in { -let isReMaterializable = 1 in -def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), - Pseudo, IIC_iALUi, - "adr$p\t$dst, #$label", []>; - -} // neverHasSideEffects -def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), - (ins i32imm:$label, nohash_imm:$id, pred:$p), - Pseudo, IIC_iALUi, - "adr$p\t$dst, #${label}_${id}", []> { - let Inst{25} = 1; +let neverHasSideEffects = 1, isReMaterializable = 1 in +// The 'adr' mnemonic encodes differently if the label is before or after +// the instruction. The {24-21} opcode bits are set by the fixup, as we don't +// know until then which form of the instruction will be used. +def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label), + MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> { + bits<4> Rd; + bits<12> label; + let Inst{27-25} = 0b001; + let Inst{20} = 0; + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rd; + let Inst{11-0} = label; } +def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), + Size4Bytes, IIC_iALUi, []>; + +def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), + (ins i32imm:$label, nohash_imm:$id, pred:$p), + Size4Bytes, IIC_iALUi, []>; //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -893,159 +1273,139 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, "bx", "\tlr", [(ARMretflag)]>, Requires<[IsARM, HasV4T]> { - let Inst{3-0} = 0b1110; - let Inst{7-4} = 0b0001; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; + let Inst{27-0} = 0b0001001011111111111100011110; } // ARMV4 only - def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, + def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, "mov", "\tpc, lr", [(ARMretflag)]>, Requires<[IsARM, NoV4T]> { - let Inst{11-0} = 0b000000001110; - let Inst{15-12} = 0b1111; - let Inst{19-16} = 0b0000; - let Inst{27-20} = 0b00011010; + let Inst{27-0} = 0b0001101000001111000000001110; } } // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // ARMV4T and above - def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", + def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", [(brind GPR:$dst)]>, Requires<[IsARM, HasV4T]> { - let Inst{7-4} = 0b0001; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; - let Inst{31-28} = 0b1110; + bits<4> dst; + let Inst{31-4} = 0b1110000100101111111111110001; + let Inst{3-0} = dst; } // ARMV4 only - def MOVPCRX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "mov\tpc, $dst", - [(brind GPR:$dst)]>, - Requires<[IsARM, NoV4T]> { - let Inst{11-4} = 0b00000000; - let Inst{15-12} = 0b1111; - let Inst{19-16} = 0b0000; - let Inst{27-20} = 0b00011010; - let Inst{31-28} = 0b1110; - } + // FIXME: We would really like to define this as a vanilla ARMPat like: + // ARMPat<(brind GPR:$dst), (MOVr PC, GPR:$dst)> + // With that, however, we can't set isBranch, isTerminator, etc.. + def MOVPCRX : ARMPseudoInst<(outs), (ins GPR:$dst), + Size4Bytes, IIC_Br, [(brind GPR:$dst)]>, + Requires<[IsARM, NoV4T]>; } -// FIXME: remove when we have a way to marking a MI with these properties. -// FIXME: Should pc be an implicit operand like PICADD, etc? -let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, - hasExtraDefRegAllocReq = 1 in - def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$dsts, variable_ops), - IndexModeUpd, LdStMulFrm, IIC_Br, - "ldm${addr:submode}${p}\t$addr!, $dsts", - "$addr.addr = $wb", []>; - -// On non-Darwin platforms R9 is callee-saved. +// All calls clobber the non-callee saved registers. SP is marked as +// a use to prevent stack-pointer assignments that appear immediately +// before calls from potentially appearing dead. let isCall = 1, + // On non-Darwin platforms R9 is callee-saved. Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { - def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), - IIC_Br, "bl\t${func:call}", + D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR], + Uses = [SP] in { + def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), + IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsNotDarwin]> { let Inst{31-28} = 0b1110; + bits<24> func; + let Inst{23-0} = func; } - def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), - IIC_Br, "bl", "\t${func:call}", + def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), + IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsNotDarwin]>; + Requires<[IsARM, IsNotDarwin]> { + bits<24> func; + let Inst{23-0} = func; + } // ARMv5T and above def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsNotDarwin]> { - let Inst{7-4} = 0b0011; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; + bits<4> func; + let Inst{31-4} = 0b1110000100101111111111110011; + let Inst{3-0} = func; } // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. - def BX : ABXIx2<(outs), (ins tGPR:$func, variable_ops), - IIC_Br, "mov\tlr, pc\n\tbx\t$func", - [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsNotDarwin]> { - let Inst{7-4} = 0b0001; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; - } + def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), + Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, HasV4T, IsNotDarwin]>; // ARMv4 - def BMOVPCRX : ABXIx2<(outs), (ins tGPR:$func, variable_ops), - IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func", - [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsNotDarwin]> { - let Inst{11-4} = 0b00000000; - let Inst{15-12} = 0b1111; - let Inst{19-16} = 0b0000; - let Inst{27-20} = 0b00011010; - } + def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), + Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, NoV4T, IsNotDarwin]>; } -// On Darwin R9 is call-clobbered. let isCall = 1, + // On Darwin R9 is call-clobbered. + // R7 is marked as a use to prevent frame-pointer assignments from being + // moved above / below calls. Defs = [R0, R1, R2, R3, R9, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { - def BLr9 : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), - IIC_Br, "bl\t${func:call}", + D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR], + Uses = [R7, SP] in { + def BLr9 : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops), + IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> { let Inst{31-28} = 0b1110; + bits<24> func; + let Inst{23-0} = func; } - def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), - IIC_Br, "bl", "\t${func:call}", + def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops), + IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsDarwin]> { + bits<24> func; + let Inst{23-0} = func; + } // ARMv5T and above def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> { - let Inst{7-4} = 0b0011; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; + bits<4> func; + let Inst{31-4} = 0b1110000100101111111111110011; + let Inst{3-0} = func; } // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. - def BXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops), - IIC_Br, "mov\tlr, pc\n\tbx\t$func", - [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsDarwin]> { - let Inst{7-4} = 0b0001; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; - } + def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), + Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, HasV4T, IsDarwin]>; // ARMv4 - def BMOVPCRXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops), - IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func", - [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsDarwin]> { - let Inst{11-4} = 0b00000000; - let Inst{15-12} = 0b1111; - let Inst{19-16} = 0b0000; - let Inst{27-20} = 0b00011010; - } + def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), + Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsARM, NoV4T, IsDarwin]>; } // Tail calls. +// FIXME: These should probably be xformed into the non-TC versions of the +// instructions as part of MC lowering. +// FIXME: These seem to be used for both Thumb and ARM instruction selection. +// Thumb should have its own version since the instruction is actually +// different, even though the mnemonic is the same. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Darwin versions. let Defs = [R0, R1, R2, R3, R9, R12, @@ -1053,29 +1413,26 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, PC], Uses = [SP] in { - def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops), - Pseudo, IIC_Br, - "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>; + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), + IIC_Br, []>, Requires<[IsDarwin]>; - def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops), - Pseudo, IIC_Br, - "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + IIC_Br, []>, Requires<[IsDarwin]>; def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), IIC_Br, "b\t$dst @ TAILCALL", - []>, Requires<[IsDarwin]>; + []>, Requires<[IsARM, IsDarwin]>; def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), IIC_Br, "b.w\t$dst @ TAILCALL", - []>, Requires<[IsDarwin]>; + []>, Requires<[IsThumb, IsDarwin]>; def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops), BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", []>, Requires<[IsDarwin]> { - let Inst{7-4} = 0b0001; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; - let Inst{31-28} = 0b1110; + bits<4> dst; + let Inst{31-4} = 0b1110000100101111111111110001; + let Inst{3-0} = dst; } } @@ -1085,13 +1442,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, PC], Uses = [SP] in { - def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops), - Pseudo, IIC_Br, - "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>; + def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), + IIC_Br, []>, Requires<[IsNotDarwin]>; - def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops), - Pseudo, IIC_Br, - "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>; + def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + IIC_Br, []>, Requires<[IsNotDarwin]>; def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), IIC_Br, "b\t$dst @ TAILCALL", @@ -1104,10 +1459,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops), BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", []>, Requires<[IsNotDarwin]> { - let Inst{7-4} = 0b0001; - let Inst{19-8} = 0b111111111111; - let Inst{27-20} = 0b00010010; - let Inst{31-28} = 0b1110; + bits<4> dst; + let Inst{31-4} = 0b1110000100101111111111110001; + let Inst{3-0} = dst; } } } @@ -1117,48 +1471,40 @@ let isBranch = 1, isTerminator = 1 in { let isBarrier = 1 in { let isPredicable = 1 in def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br, - "b\t$target", [(br bb:$target)]>; + "b\t$target", [(br bb:$target)]> { + bits<24> target; + let Inst{31-28} = 0b1110; + let Inst{23-0} = target; + } - let isNotDuplicable = 1, isIndirectBranch = 1 in { - def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target$jt", - [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { - let Inst{11-4} = 0b00000000; - let Inst{15-12} = 0b1111; - let Inst{20} = 0; // S Bit - let Inst{24-21} = 0b1101; - let Inst{27-25} = 0b000; - } - def BR_JTm : JTI<(outs), - (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "ldr\tpc, $target$jt", - [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, - imm:$id)]> { - let Inst{15-12} = 0b1111; - let Inst{20} = 1; // L bit - let Inst{21} = 0; // W bit - let Inst{22} = 0; // B bit - let Inst{24} = 1; // P bit - let Inst{27-25} = 0b011; - } - def BR_JTadd : JTI<(outs), - (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "add\tpc, $target, $idx$jt", - [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, - imm:$id)]> { - let Inst{15-12} = 0b1111; - let Inst{20} = 0; // S bit - let Inst{24-21} = 0b0100; - let Inst{27-25} = 0b000; - } - } // isNotDuplicable = 1, isIndirectBranch = 1 + let isNotDuplicable = 1, isIndirectBranch = 1 in { + def BR_JTr : ARMPseudoInst<(outs), + (ins GPR:$target, i32imm:$jt, i32imm:$id), + SizeSpecial, IIC_Br, + [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; + // FIXME: This shouldn't use the generic "addrmode2," but rather be split + // into i12 and rs suffixed versions. + def BR_JTm : ARMPseudoInst<(outs), + (ins addrmode2:$target, i32imm:$jt, i32imm:$id), + SizeSpecial, IIC_Br, + [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, + imm:$id)]>; + def BR_JTadd : ARMPseudoInst<(outs), + (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id), + SizeSpecial, IIC_Br, + [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, + imm:$id)]>; + } // isNotDuplicable = 1, isIndirectBranch = 1 } // isBarrier = 1 // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( - def Bcc : ABI<0b1010, (outs), (ins brtarget:$target), + def Bcc : ABI<0b1010, (outs), (ins br_target:$target), IIC_Br, "b", "\t$target", - [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>; + [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> { + bits<24> target; + let Inst{23-0} = target; + } } // Branch and Exchange Jazelle -- for disassembly only @@ -1172,271 +1518,303 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Secure Monitor Call is a system instruction -- for disassembly only def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0110; - let Inst{7-4} = 0b0111; + bits<4> opt; + let Inst{23-4} = 0b01100000000000000111; + let Inst{3-0} = opt; } // Supervisor Call (Software Interrupt) -- for disassembly only -let isCall = 1 in { +let isCall = 1, Uses = [SP] in { def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + bits<24> svc; + let Inst{23-0} = svc; +} } // Store Return State is a system instruction -- for disassembly only -def SRSW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode), - NoItinerary, "srs${addr:submode}\tsp!, $mode", +let isCodeGenOnly = 1 in { // FIXME: This should not use submode! +def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), + NoItinerary, "srs${amode}\tsp!, $mode", [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b110; // W = 1 } -def SRS : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode), - NoItinerary, "srs${addr:submode}\tsp, $mode", +def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), + NoItinerary, "srs${amode}\tsp, $mode", [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b100; // W = 0 } // Return From Exception is a system instruction -- for disassembly only -def RFEW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base), - NoItinerary, "rfe${addr:submode}\t$base!", +def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), + NoItinerary, "rfe${amode}\t$base!", [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b011; // W = 1 } -def RFE : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base), - NoItinerary, "rfe${addr:submode}\t$base", +def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), + NoItinerary, "rfe${amode}\t$base", [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b001; // W = 0 } +} // isCodeGenOnly = 1 //===----------------------------------------------------------------------===// // Load / store Instructions. // // Load -let canFoldAsLoad = 1, isReMaterializable = 1 in -def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, - "ldr", "\t$dst, $addr", - [(set GPR:$dst, (load addrmode2:$addr))]>; + + +defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si, + UnOpFrag<(load node:$Src)>>; +defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si, + UnOpFrag<(zextloadi8 node:$Src)>>; +defm STR : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si, + BinOpFrag<(store node:$LHS, node:$RHS)>>; +defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, + BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, isReMaterializable = 1 in -def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, - "ldr", "\t$dst, $addr", []>; +def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr), + AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr", + []> { + bits<4> Rt; + bits<17> addr; + let Inst{23} = addr{12}; // U (add = ('U' == 1)) + let Inst{19-16} = 0b1111; + let Inst{15-12} = Rt; + let Inst{11-0} = addr{11-0}; // imm12 +} // Loads with zero extension -def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldrh", "\t$dst, $addr", - [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; - -def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, - IIC_iLoadr, "ldrb", "\t$dst, $addr", - [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; +def LDRH : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, + IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr", + [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>; // Loads with sign extension -def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldrsh", "\t$dst, $addr", - [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; - -def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldrsb", "\t$dst, $addr", - [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; - -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, + IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr", + [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>; + +def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, + IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", + [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, + isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring? +// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1) +// how to represent that such that tblgen is happy and we don't +// mark this codegen only? // Load doubleword -def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldrd", "\t$dst1, $addr", +def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), + (ins addrmode3:$addr), LdMiscFrm, + IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr", []>, Requires<[IsARM, HasV5TE]>; +} // Indexed loads -def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode2:$addr), LdFrm, IIC_iLoadru, - "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>; - -def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, - "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>; - -def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; - -def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; - -def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode2:$addr), LdFrm, IIC_iLoadru, - "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; - -def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, - "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; - -def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; - -def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; - -def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; - -def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; +multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { + def _PRE : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addrmode2:$addr), IndexModePre, LdFrm, itin, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; + } + def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins GPR:$Rn, am2offset:$offset), + IndexModePost, LdFrm, itin, + opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> { + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<14> offset; + bits<4> Rn; + let Inst{25} = offset{13}; + let Inst{23} = offset{12}; + let Inst{19-16} = Rn; + let Inst{11-0} = offset{11-0}; + } +} -// For disassembly only -def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, - "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>, - Requires<[IsARM, HasV5TE]>; +let mayLoad = 1, neverHasSideEffects = 1 in { +defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>; +defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>; +} -// For disassembly only -def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr, - "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>, - Requires<[IsARM, HasV5TE]>; +multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> { + def _PRE : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), + (ins addrmode3:$addr), IndexModePre, + LdMiscFrm, itin, + opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + } + def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), + (ins GPR:$Rn, am3offset:$offset), IndexModePost, + LdMiscFrm, itin, + opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> { + bits<10> offset; + bits<4> Rn; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = Rn; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm + } +} -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +let mayLoad = 1, neverHasSideEffects = 1 in { +defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>; +defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>; +defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>; +let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in +defm LDRD : AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>; +} // mayLoad = 1, neverHasSideEffects = 1 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. - -def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, +let mayLoad = 1, neverHasSideEffects = 1 in { +def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am2offset:$offset), IndexModeNone, + LdFrm, IIC_iLoad_ru, "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } - -def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, +def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am2offset:$offset), IndexModeNone, + LdFrm, IIC_iLoad_bh_ru, "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } - -def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, +def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am3offset:$offset), IndexModePost, + LdMiscFrm, IIC_iLoad_bh_ru, "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } - -def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am3offset:$offset), IndexModePost, + LdMiscFrm, IIC_iLoad_bh_ru, + "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } - -def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, +def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), + (ins GPR:$base, am3offset:$offset), IndexModePost, + LdMiscFrm, IIC_iLoad_bh_ru, "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } +} // Store -def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, - "str", "\t$src, $addr", - [(store GPR:$src, addrmode2:$addr)]>; // Stores with truncate -def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, - IIC_iStorer, "strh", "\t$src, $addr", - [(truncstorei16 GPR:$src, addrmode3:$addr)]>; - -def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, - "strb", "\t$src, $addr", - [(truncstorei8 GPR:$src, addrmode2:$addr)]>; +def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, + IIC_iStore_bh_r, "strh", "\t$Rt, $addr", + [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in -def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), - StMiscFrm, IIC_iStorer, +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1, + isCodeGenOnly = 1 in // $src2 doesn't exist in asm string +def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), + StMiscFrm, IIC_iStore_d_r, "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores -def STR_PRE : AI2stwpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, am2offset:$offset), - StFrm, IIC_iStoreru, - "str", "\t$src, [$base, $offset]!", "$base = $base_wb", - [(set GPR:$base_wb, - (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; - -def STR_POST : AI2stwpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, - "str", "\t$src, [$base], $offset", "$base = $base_wb", - [(set GPR:$base_wb, - (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; - -def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), - StMiscFrm, IIC_iStoreru, - "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", - [(set GPR:$base_wb, - (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; - -def STRH_POST: AI3sthpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), - StMiscFrm, IIC_iStoreru, - "strh", "\t$src, [$base], $offset", "$base = $base_wb", - [(set GPR:$base_wb, (post_truncsti16 GPR:$src, - GPR:$base, am3offset:$offset))]>; - -def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, - "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", - [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, - GPR:$base, am2offset:$offset))]>; - -def STRB_POST: AI2stbpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, - "strb", "\t$src, [$base], $offset", "$base = $base_wb", - [(set GPR:$base_wb, (post_truncsti8 GPR:$src, - GPR:$base, am2offset:$offset))]>; +def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), + IndexModePre, StFrm, IIC_iStore_ru, + "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + [(set GPR:$Rn_wb, + (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; + +def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), + IndexModePost, StFrm, IIC_iStore_ru, + "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + [(set GPR:$Rn_wb, + (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; + +def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), + IndexModePre, StFrm, IIC_iStore_bh_ru, + "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt, + GPR:$Rn, am2offset:$offset))]>; +def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt, + GPR:$Rn, am2offset:$offset))]>; + +def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), + IndexModePre, StMiscFrm, IIC_iStore_ru, + "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + [(set GPR:$Rn_wb, + (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; + +def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), + IndexModePost, StMiscFrm, IIC_iStore_bh_ru, + "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, + GPR:$Rn, am3offset:$offset))]>; // For disassembly only def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), - StMiscFrm, IIC_iStoreru, + StMiscFrm, IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base, $offset]!", "$base = $base_wb", []>; // For disassembly only def STRD_POST: AI3stdpo<(outs GPR:$base_wb), (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), - StMiscFrm, IIC_iStoreru, + StMiscFrm, IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base], $offset", "$base = $base_wb", []>; // STRT, STRBT, and STRHT are for disassembly only. -def STRT : AI2stwpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, - "strt", "\t$src, [$base], $offset", "$base = $base_wb", +def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn,am2offset:$offset), + IndexModeNone, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite } -def STRBT : AI2stbpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), - StFrm, IIC_iStoreru, - "strbt", "\t$src, [$base], $offset", "$base = $base_wb", +def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb), + (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), + IndexModeNone, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite } def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), - StMiscFrm, IIC_iStoreru, + StMiscFrm, IIC_iStore_bh_ru, "strht", "\t$src, [$base], $offset", "$base = $base_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite @@ -1446,103 +1824,212 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb), // Load / store multiple Instructions. // -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, - reglist:$dsts, variable_ops), - IndexModeNone, LdStMulFrm, IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $dsts", "", []>; - -def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$dsts, variable_ops), - IndexModeUpd, LdStMulFrm, IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr!, $dsts", - "$addr.addr = $wb", []>; -} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq - -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { -def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, - reglist:$srcs, variable_ops), - IndexModeNone, LdStMulFrm, IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $srcs", "", []>; - -def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$srcs, variable_ops), - IndexModeUpd, LdStMulFrm, IIC_iStorem, - "stm${addr:submode}${p}\t$addr!, $srcs", - "$addr.addr = $wb", []>; -} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq +multiclass arm_ldst_mult<string asm, bit L_bit, Format f, + InstrItinClass itin, InstrItinClass itin_upd> { + def IA : + AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeNone, f, itin, + !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def IA_UPD : + AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeUpd, f, itin_upd, + !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } + def DA : + AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeNone, f, itin, + !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b00; // Decrement After + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def DA_UPD : + AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeUpd, f, itin_upd, + !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b00; // Decrement After + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } + def DB : + AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeNone, f, itin, + !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def DB_UPD : + AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeUpd, f, itin_upd, + !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } + def IB : + AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeNone, f, itin, + !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b11; // Increment Before + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def IB_UPD : + AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + IndexModeUpd, f, itin_upd, + !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b11; // Increment Before + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } +} + +let neverHasSideEffects = 1 in { + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>; + +} // neverHasSideEffects + +// Load / Store Multiple Mnemonic Aliases +def : MnemonicAlias<"ldm", "ldmia">; +def : MnemonicAlias<"stm", "stmia">; + +// FIXME: remove when we have a way to marking a MI with these properties. +// FIXME: Should pc be an implicit operand like PICADD, etc? +let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, + hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in +// FIXME: Should be a pseudo-instruction. +def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr, + "ldmia${p}\t$Rn!, $regs", + "$Rn = $wb", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 1; // Writeback + let Inst{20} = 1; // Load +} //===----------------------------------------------------------------------===// // Move Instructions. // let neverHasSideEffects = 1 in -def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, - "mov", "\t$dst, $src", []>, UnaryDP { +def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, + "mov", "\t$Rd, $Rm", []>, UnaryDP { + bits<4> Rd; + bits<4> Rm; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; } // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in -def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm, - IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP { +def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, + IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP { + bits<4> Rd; + bits<4> Rm; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; } -def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), +def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src), DPSoRegFrm, IIC_iMOVsr, - "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { + "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>, + UnaryDP { + bits<4> Rd; + bits<12> src; + let Inst{15-12} = Rd; + let Inst{11-0} = src; let Inst{25} = 0; } -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi, - "mov", "\t$dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in +def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, + "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP { + bits<4> Rd; + bits<12> imm; let Inst{25} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; + let Inst{11-0} = imm; } -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in +def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm), DPFrm, IIC_iMOVi, - "movw", "\t$dst, $src", - [(set GPR:$dst, imm0_65535:$src)]>, + "movw", "\t$Rd, $imm", + [(set GPR:$Rd, imm0_65535:$imm)]>, Requires<[IsARM, HasV6T2]>, UnaryDP { + bits<4> Rd; + bits<16> imm; + let Inst{15-12} = Rd; + let Inst{11-0} = imm{11-0}; + let Inst{19-16} = imm{15-12}; let Inst{20} = 0; let Inst{25} = 1; } -let Constraints = "$src = $dst" in -def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), +def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + +let Constraints = "$src = $Rd" in { +def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm), DPFrm, IIC_iMOVi, - "movt", "\t$dst, $imm", - [(set GPR:$dst, + "movt", "\t$Rd, $imm", + [(set GPR:$Rd, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<16> imm; + let Inst{15-12} = Rd; + let Inst{11-0} = imm{11-0}; + let Inst{19-16} = imm{15-12}; let Inst{20} = 0; let Inst{25} = 1; } +def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), + (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + +} // Constraints + def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, Requires<[IsARM, HasV6T2]>; let Uses = [CPSR] in -def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, - "mov", "\t$dst, $src, rrx", - [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP; +def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, + [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP, + Requires<[IsARM]>; // These aren't really mov instructions, but we have to define them this way // due to flag operands. let Defs = [CPSR] in { -def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1", - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; -def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "movs", "\t$dst, $src, asr #1", - [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP; +def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP, + Requires<[IsARM]>; +def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP, + Requires<[IsARM]>; } //===----------------------------------------------------------------------===// @@ -1551,31 +2038,31 @@ def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, // Sign extenders -defm SXTB : AI_unary_rrot<0b01101010, - "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -defm SXTH : AI_unary_rrot<0b01101011, - "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; +defm SXTB : AI_ext_rrot<0b01101010, + "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; +defm SXTH : AI_ext_rrot<0b01101011, + "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm SXTAB : AI_bin_rrot<0b01101010, +defm SXTAB : AI_exta_rrot<0b01101010, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -defm SXTAH : AI_bin_rrot<0b01101011, +defm SXTAH : AI_exta_rrot<0b01101011, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; // For disassembly only -defm SXTB16 : AI_unary_rrot_np<0b01101000, "sxtb16">; +defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; // For disassembly only -defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">; +defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; // Zero extenders let AddedComplexity = 16 in { -defm UXTB : AI_unary_rrot<0b01101110, - "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; -defm UXTH : AI_unary_rrot<0b01101111, - "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm UXTB16 : AI_unary_rrot<0b01101100, - "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; +defm UXTB : AI_ext_rrot<0b01101110, + "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>; +defm UXTH : AI_ext_rrot<0b01101111, + "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>; +defm UXTB16 : AI_ext_rrot<0b01101100, + "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. // The transformation should probably be done as a combiner action @@ -1586,33 +2073,49 @@ defm UXTB16 : AI_unary_rrot<0b01101100, def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16r_rot GPR:$Src, 8)>; -defm UXTAB : AI_bin_rrot<0b01101110, "uxtab", +defm UXTAB : AI_exta_rrot<0b01101110, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -defm UXTAH : AI_bin_rrot<0b01101111, "uxtah", +defm UXTAH : AI_exta_rrot<0b01101111, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; } // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. // For disassembly only -defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">; +defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; -def SBFX : I<(outs GPR:$dst), - (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi, - "sbfx", "\t$dst, $src, $lsb, $width", "", []>, +def SBFX : I<(outs GPR:$Rd), + (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width), + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<4> Rn; + bits<5> lsb; + bits<5> width; let Inst{27-21} = 0b0111101; let Inst{6-4} = 0b101; + let Inst{20-16} = width; + let Inst{15-12} = Rd; + let Inst{11-7} = lsb; + let Inst{3-0} = Rn; } -def UBFX : I<(outs GPR:$dst), - (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi, - "ubfx", "\t$dst, $src, $lsb, $width", "", []>, +def UBFX : I<(outs GPR:$Rd), + (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width), + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<4> Rn; + bits<5> lsb; + bits<5> width; let Inst{27-21} = 0b0111111; let Inst{6-4} = 0b101; + let Inst{20-16} = width; + let Inst{15-12} = Rd; + let Inst{11-7} = lsb; + let Inst{3-0} = Rn; } //===----------------------------------------------------------------------===// @@ -1620,100 +2123,166 @@ def UBFX : I<(outs GPR:$dst), // defm ADD : AsI1_bin_irs<0b0100, "add", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; defm SUB : AsI1_bin_irs<0b0010, "sub", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. defm ADDS : AI1_bin_s_irs<0b0100, "adds", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; defm SUBS : AI1_bin_s_irs<0b0010, "subs", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; + +// ADC and SUBC with 's' bit set. defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; -def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", "\t$dst, $a, $b", - [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { - let Inst{25} = 1; +def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{11-0} = imm; } // The reg/reg form is only defined for the disassembler; for codegen it is // equivalent to SUBrr. -def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, - IIC_iALUr, "rsb", "\t$dst, $a, $b", +def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, + IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]> { - let Inst{25} = 0; - let Inst{11-4} = 0b00000000; + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; } -def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", "\t$dst, $a, $b", - [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { - let Inst{25} = 0; +def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{11-0} = shift; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; } // RSB with 's' bit set. -let Defs = [CPSR] in { -def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsbs", "\t$dst, $a, $b", - [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> { - let Inst{20} = 1; - let Inst{25} = 1; -} -def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsbs", "\t$dst, $a, $b", - [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> { - let Inst{20} = 1; - let Inst{25} = 0; +let isCodeGenOnly = 1, Defs = [CPSR] in { +def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{20} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{11-0} = imm; +} +def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{11-0} = shift; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; } } let Uses = [CPSR] in { -def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b", - [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>, +def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm", + [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>, Requires<[IsARM]> { - let Inst{25} = 1; + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{11-0} = imm; } // The reg/reg form is only defined for the disassembler; for codegen it is // equivalent to SUBrr. -def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b", +def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]> { - let Inst{25} = 0; - let Inst{11-4} = 0b00000000; + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; + let Inst{3-0} = Rm; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; } -def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", - [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, +def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>, Requires<[IsARM]> { - let Inst{25} = 0; + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{11-0} = shift; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; } } // FIXME: Allow these to be predicated. -let Defs = [CPSR], Uses = [CPSR] in { -def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b", - [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>, +let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in { +def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm", + [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>, Requires<[IsARM]> { - let Inst{20} = 1; - let Inst{25} = 1; + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{20} = 1; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; + let Inst{11-0} = imm; } -def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b", - [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>, +def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift", + [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>, Requires<[IsARM]> { - let Inst{20} = 1; - let Inst{25} = 0; + bits<4> Rd; + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{11-0} = shift; + let Inst{15-12} = Rd; + let Inst{19-16} = Rn; } } @@ -1740,111 +2309,166 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm), // ARM Arithmetic Instruction -- for disassembly only // GPR:$dst = GPR:$a op GPR:$b -class AAI<bits<8> op27_20, bits<4> op7_4, string opc, - list<dag> pattern = [/* For disassembly only; pattern left blank */]> - : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, - opc, "\t$dst, $a, $b", pattern> { +class AAI<bits<8> op27_20, bits<8> op11_4, string opc, + list<dag> pattern = [/* For disassembly only; pattern left blank */], + dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm"> + : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> { + bits<4> Rn; + bits<4> Rd; + bits<4> Rm; let Inst{27-20} = op27_20; - let Inst{7-4} = op7_4; + let Inst{11-4} = op11_4; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{3-0} = Rm; } // Saturating add/subtract -- for disassembly only -def QADD : AAI<0b00010000, 0b0101, "qadd", - [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>; -def QADD16 : AAI<0b01100010, 0b0001, "qadd16">; -def QADD8 : AAI<0b01100010, 0b1001, "qadd8">; -def QASX : AAI<0b01100010, 0b0011, "qasx">; -def QDADD : AAI<0b00010100, 0b0101, "qdadd">; -def QDSUB : AAI<0b00010110, 0b0101, "qdsub">; -def QSAX : AAI<0b01100010, 0b0101, "qsax">; -def QSUB : AAI<0b00010010, 0b0101, "qsub", - [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>; -def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">; -def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">; -def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">; -def UQADD8 : AAI<0b01100110, 0b1001, "uqadd8">; -def UQASX : AAI<0b01100110, 0b0011, "uqasx">; -def UQSAX : AAI<0b01100110, 0b0101, "uqsax">; -def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">; -def UQSUB8 : AAI<0b01100110, 0b1111, "uqsub8">; +def QADD : AAI<0b00010000, 0b00000101, "qadd", + [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))], + (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">; +def QSUB : AAI<0b00010010, 0b00000101, "qsub", + [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))], + (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">; +def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn), + "\t$Rd, $Rm, $Rn">; +def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn), + "\t$Rd, $Rm, $Rn">; + +def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">; +def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">; +def QASX : AAI<0b01100010, 0b11110011, "qasx">; +def QSAX : AAI<0b01100010, 0b11110101, "qsax">; +def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">; +def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">; +def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">; +def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">; +def UQASX : AAI<0b01100110, 0b11110011, "uqasx">; +def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">; +def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">; +def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">; // Signed/Unsigned add/subtract -- for disassembly only -def SASX : AAI<0b01100001, 0b0011, "sasx">; -def SADD16 : AAI<0b01100001, 0b0001, "sadd16">; -def SADD8 : AAI<0b01100001, 0b1001, "sadd8">; -def SSAX : AAI<0b01100001, 0b0101, "ssax">; -def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">; -def SSUB8 : AAI<0b01100001, 0b1111, "ssub8">; -def UASX : AAI<0b01100101, 0b0011, "uasx">; -def UADD16 : AAI<0b01100101, 0b0001, "uadd16">; -def UADD8 : AAI<0b01100101, 0b1001, "uadd8">; -def USAX : AAI<0b01100101, 0b0101, "usax">; -def USUB16 : AAI<0b01100101, 0b0111, "usub16">; -def USUB8 : AAI<0b01100101, 0b1111, "usub8">; +def SASX : AAI<0b01100001, 0b11110011, "sasx">; +def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">; +def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">; +def SSAX : AAI<0b01100001, 0b11110101, "ssax">; +def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">; +def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">; +def UASX : AAI<0b01100101, 0b11110011, "uasx">; +def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">; +def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">; +def USAX : AAI<0b01100101, 0b11110101, "usax">; +def USUB16 : AAI<0b01100101, 0b11110111, "usub16">; +def USUB8 : AAI<0b01100101, 0b11111111, "usub8">; // Signed/Unsigned halving add/subtract -- for disassembly only -def SHASX : AAI<0b01100011, 0b0011, "shasx">; -def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">; -def SHADD8 : AAI<0b01100011, 0b1001, "shadd8">; -def SHSAX : AAI<0b01100011, 0b0101, "shsax">; -def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">; -def SHSUB8 : AAI<0b01100011, 0b1111, "shsub8">; -def UHASX : AAI<0b01100111, 0b0011, "uhasx">; -def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">; -def UHADD8 : AAI<0b01100111, 0b1001, "uhadd8">; -def UHSAX : AAI<0b01100111, 0b0101, "uhsax">; -def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">; -def UHSUB8 : AAI<0b01100111, 0b1111, "uhsub8">; +def SHASX : AAI<0b01100011, 0b11110011, "shasx">; +def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">; +def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">; +def SHSAX : AAI<0b01100011, 0b11110101, "shsax">; +def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">; +def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">; +def UHASX : AAI<0b01100111, 0b11110011, "uhasx">; +def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">; +def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">; +def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">; +def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">; +def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; // Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only -def USAD8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), +def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", - "\t$dst, $a, $b", []>, + "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; let Inst{27-20} = 0b01111000; let Inst{15-12} = 0b1111; let Inst{7-4} = 0b0001; + let Inst{19-16} = Rd; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; } -def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), +def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), MulFrm /* for convenience */, NoItinerary, "usada8", - "\t$dst, $a, $b, $acc", []>, + "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV6]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + bits<4> Ra; let Inst{27-20} = 0b01111000; let Inst{7-4} = 0b0001; + let Inst{19-16} = Rd; + let Inst{15-12} = Ra; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; } // Signed/Unsigned saturate -- for disassembly only -def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh), - SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh", +def SSAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh), + SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh", [/* For disassembly only; pattern left blank */]> { + bits<4> Rd; + bits<5> sat_imm; + bits<4> Rn; + bits<8> sh; let Inst{27-21} = 0b0110101; let Inst{5-4} = 0b01; + let Inst{20-16} = sat_imm; + let Inst{15-12} = Rd; + let Inst{11-7} = sh{7-3}; + let Inst{6} = sh{0}; + let Inst{3-0} = Rn; } -def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm, - NoItinerary, "ssat16", "\t$dst, $bit_pos, $a", +def SSAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$Rn), SatFrm, + NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", [/* For disassembly only; pattern left blank */]> { + bits<4> Rd; + bits<4> sat_imm; + bits<4> Rn; let Inst{27-20} = 0b01101010; - let Inst{7-4} = 0b0011; + let Inst{11-4} = 0b11110011; + let Inst{15-12} = Rd; + let Inst{19-16} = sat_imm; + let Inst{3-0} = Rn; } -def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh), - SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh", +def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh), + SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh", [/* For disassembly only; pattern left blank */]> { + bits<4> Rd; + bits<5> sat_imm; + bits<4> Rn; + bits<8> sh; let Inst{27-21} = 0b0110111; let Inst{5-4} = 0b01; + let Inst{15-12} = Rd; + let Inst{11-7} = sh{7-3}; + let Inst{6} = sh{0}; + let Inst{20-16} = sat_imm; + let Inst{3-0} = Rn; } -def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm, - NoItinerary, "usat16", "\t$dst, $bit_pos, $a", +def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm, + NoItinerary, "usat16", "\t$Rd, $sat_imm, $a", [/* For disassembly only; pattern left blank */]> { + bits<4> Rd; + bits<4> sat_imm; + bits<4> Rn; let Inst{27-20} = 0b01101110; - let Inst{7-4} = 0b0011; + let Inst{11-4} = 0b11110011; + let Inst{15-12} = Rd; + let Inst{19-16} = sat_imm; + let Inst{3-0} = Rn; } def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>; @@ -1855,52 +2479,100 @@ def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>; // defm AND : AsI1_bin_irs<0b0000, "and", + IIC_iBITi, IIC_iBITr, IIC_iBITsr, BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; -defm ANDS : AI1_bin_s_irs<0b0000, "and", - BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", + IIC_iBITi, IIC_iBITr, IIC_iBITsr, BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", + IIC_iBITi, IIC_iBITr, IIC_iBITsr, BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; defm BIC : AsI1_bin_irs<0b1110, "bic", + IIC_iBITi, IIC_iBITr, IIC_iBITsr, BinOpFrag<(and node:$LHS, (not node:$RHS))>>; -def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), +def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfc", "\t$dst, $imm", "$src = $dst", - [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>, + "bfc", "\t$Rd, $imm", "$src = $Rd", + [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<10> imm; let Inst{27-21} = 0b0111110; let Inst{6-0} = 0b0011111; + let Inst{15-12} = Rd; + let Inst{11-7} = imm{4-0}; // lsb + let Inst{20-16} = imm{9-5}; // width } // A8.6.18 BFI - Bitfield insert (Encoding A1) -def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm), +def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$dst, $val, $imm", "$src = $dst", - [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val, + "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd", + [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<4> Rn; + bits<10> imm; + let Inst{27-21} = 0b0111110; + let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 + let Inst{15-12} = Rd; + let Inst{11-7} = imm{4-0}; // lsb + let Inst{20-16} = imm{9-5}; // width + let Inst{3-0} = Rn; +} + +// GNU as only supports this form of bfi (w/ 4 arguments) +let isAsmParserOnly = 1 in +def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, + lsb_pos_imm:$lsb, width_imm:$width), + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd", + []>, Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<4> Rn; + bits<5> lsb; + bits<5> width; let Inst{27-21} = 0b0111110; let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 + let Inst{15-12} = Rd; + let Inst{11-7} = lsb; + let Inst{20-16} = width; // Custom encoder => lsb+width-1 + let Inst{3-0} = Rn; } -def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, - "mvn", "\t$dst, $src", - [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { +def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, + "mvn", "\t$Rd, $Rm", + [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP { + bits<4> Rd; + bits<4> Rm; let Inst{25} = 0; + let Inst{19-16} = 0b0000; let Inst{11-4} = 0b00000000; -} -def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, - IIC_iMOVsr, "mvn", "\t$dst, $src", - [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP { + let Inst{15-12} = Rd; + let Inst{3-0} = Rm; +} +def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm, + IIC_iMVNsr, "mvn", "\t$Rd, $shift", + [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP { + bits<4> Rd; + bits<12> shift; let Inst{25} = 0; -} -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, - IIC_iMOVi, "mvn", "\t$dst, $imm", - [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP { - let Inst{25} = 1; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-0} = shift; +} +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in +def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, + IIC_iMVNi, "mvn", "\t$Rd, $imm", + [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP { + bits<4> Rd; + bits<12> imm; + let Inst{25} = 1; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; } def : ARMPat<(and GPR:$src, so_imm_not:$imm), @@ -1909,247 +2581,299 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm), //===----------------------------------------------------------------------===// // Multiply Instructions. // +class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = Rd; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} -let isCommutable = 1 in -def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, "mul", "\t$dst, $a, $b", - [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; - -def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "mla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; - -def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "mls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, - Requires<[IsARM, HasV6T2]>; +let isCommutable = 1 in { +let Constraints = "@earlyclobber $Rd" in +def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, + pred:$p, cc_out:$s), + Size4Bytes, IIC_iMUL32, + [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, NoV6]>; + +def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]>; +} + +let Constraints = "@earlyclobber $Rd" in +def MLAv5: ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), + Size4Bytes, IIC_iMAC32, + [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, + Requires<[IsARM, NoV6]> { + bits<4> Ra; + let Inst{15-12} = Ra; +} +def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, + Requires<[IsARM, HasV6]> { + bits<4> Ra; + let Inst{15-12} = Ra; +} + +def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, + Requires<[IsARM, HasV6T2]> { + bits<4> Rd; + bits<4> Rm; + bits<4> Rn; + bits<4> Ra; + let Inst{19-16} = Rd; + let Inst{15-12} = Ra; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} // Extra precision multiplies with low / high results + let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iMUL64, - "smull", "\t$ldst, $hdst, $a, $b", []>; +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { +def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + Size4Bytes, IIC_iMUL64, []>, + Requires<[IsARM, NoV6]>; -def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iMUL64, - "umull", "\t$ldst, $hdst, $a, $b", []>; +def UMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + Size4Bytes, IIC_iMUL64, []>, + Requires<[IsARM, NoV6]>; } -// Multiply + accumulate -def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iMAC64, - "smlal", "\t$ldst, $hdst, $a, $b", []>; +def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, + "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + Requires<[IsARM, HasV6]>; -def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iMAC64, - "umlal", "\t$ldst, $hdst, $a, $b", []>; +def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, + "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + Requires<[IsARM, HasV6]>; +} -def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iMAC64, - "umaal", "\t$ldst, $hdst, $a, $b", []>, +// Multiply + accumulate +let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { +def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + Size4Bytes, IIC_iMAC64, []>, + Requires<[IsARM, NoV6]>; +def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + Size4Bytes, IIC_iMAC64, []>, + Requires<[IsARM, NoV6]>; +def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + Size4Bytes, IIC_iMAC64, []>, + Requires<[IsARM, NoV6]>; + +} + +def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + Requires<[IsARM, HasV6]>; +def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsARM, HasV6]>; + +def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64, + "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, + Requires<[IsARM, HasV6]> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rm; + bits<4> Rn; + let Inst{19-16} = RdLo; + let Inst{15-12} = RdHi; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} } // neverHasSideEffects // Most significant word multiply -def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, "smmul", "\t$dst, $a, $b", - [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, +def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b0001; let Inst{15-12} = 0b1111; } -def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, "smmulr", "\t$dst, $a, $b", +def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b0011; // R = 1 let Inst{15-12} = 0b1111; } -def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, - Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b0001; -} +def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, + Requires<[IsARM, HasV6]>; -def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c", +def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b0011; // R = 1 -} + Requires<[IsARM, HasV6]>; -def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", - [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, - Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b1101; -} +def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>, + Requires<[IsARM, HasV6]>; -def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c", +def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b1111; // R = 1 -} + Requires<[IsARM, HasV6]>; multiclass AI_smul<string opc, PatFrag opnode> { - def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16)))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 0; - let Inst{6} = 0; - } - - def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16))))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 0; - let Inst{6} = 1; - } - - def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16)))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 1; - let Inst{6} = 0; - } - - def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b", - [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16))))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 1; - let Inst{6} = 1; - } - - def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b", - [(set GPR:$dst, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16)))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 1; - let Inst{6} = 0; - } - - def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b", - [(set GPR:$dst, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16)))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 1; - let Inst{6} = 1; - } + def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16), + (sext_inreg GPR:$Rm, i16)))]>, + Requires<[IsARM, HasV5TE]>; + + def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16), + (sra GPR:$Rm, (i32 16))))]>, + Requires<[IsARM, HasV5TE]>; + + def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)), + (sext_inreg GPR:$Rm, i16)))]>, + Requires<[IsARM, HasV5TE]>; + + def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)), + (sra GPR:$Rm, (i32 16))))]>, + Requires<[IsARM, HasV5TE]>; + + def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (sra (opnode GPR:$Rn, + (sext_inreg GPR:$Rm, i16)), (i32 16)))]>, + Requires<[IsARM, HasV5TE]>; + + def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (sra (opnode GPR:$Rn, + (sra GPR:$Rm, (i32 16))), (i32 16)))]>, + Requires<[IsARM, HasV5TE]>; } multiclass AI_smla<string opc, PatFrag opnode> { - def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, - (opnode (sext_inreg GPR:$a, i16), - (sext_inreg GPR:$b, i16))))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 0; - let Inst{6} = 0; - } - - def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), - (sra GPR:$b, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 0; - let Inst{6} = 1; - } - - def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sext_inreg GPR:$b, i16))))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 1; - let Inst{6} = 0; - } - - def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16)))))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 1; - let Inst{6} = 1; - } - - def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sext_inreg GPR:$b, i16)), (i32 16))))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 0; - let Inst{6} = 0; - } - - def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, - (sra GPR:$b, (i32 16))), (i32 16))))]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 0; - let Inst{6} = 1; - } + def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add GPR:$Ra, + (opnode (sext_inreg GPR:$Rn, i16), + (sext_inreg GPR:$Rm, i16))))]>, + Requires<[IsARM, HasV5TE]>; + + def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16), + (sra GPR:$Rm, (i32 16)))))]>, + Requires<[IsARM, HasV5TE]>; + + def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)), + (sext_inreg GPR:$Rm, i16))))]>, + Requires<[IsARM, HasV5TE]>; + + def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)), + (sra GPR:$Rm, (i32 16)))))]>, + Requires<[IsARM, HasV5TE]>; + + def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn, + (sext_inreg GPR:$Rm, i16)), (i32 16))))]>, + Requires<[IsARM, HasV5TE]>; + + def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn, + (sra GPR:$Rm, (i32 16))), (i32 16))))]>, + Requires<[IsARM, HasV5TE]>; } defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only -def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), - IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", +def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), + IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 0; - let Inst{6} = 0; -} + Requires<[IsARM, HasV5TE]>; -def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), - IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", +def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), + IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 0; - let Inst{6} = 1; -} + Requires<[IsARM, HasV5TE]>; -def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), - IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", +def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), + IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 1; - let Inst{6} = 0; -} + Requires<[IsARM, HasV5TE]>; -def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), - IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", +def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), + IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV5TE]> { - let Inst{5} = 1; - let Inst{6} = 1; -} + Requires<[IsARM, HasV5TE]>; // Helper class for AI_smld -- for disassembly only -class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops, - InstrItinClass itin, string opc, string asm> +class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops, + InstrItinClass itin, string opc, string asm> : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> { + bits<4> Rn; + bits<4> Rm; let Inst{4} = 1; let Inst{5} = swap; let Inst{6} = sub; @@ -2157,21 +2881,46 @@ class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops, let Inst{21-20} = 0b00; let Inst{22} = long; let Inst{27-23} = 0b01110; + let Inst{11-8} = Rm; + let Inst{3-0} = Rn; +} +class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops, + InstrItinClass itin, string opc, string asm> + : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> { + bits<4> Rd; + let Inst{15-12} = 0b1111; + let Inst{19-16} = Rd; +} +class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops, + InstrItinClass itin, string opc, string asm> + : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> { + bits<4> Ra; + let Inst{15-12} = Ra; +} +class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops, + InstrItinClass itin, string opc, string asm> + : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> { + bits<4> RdLo; + bits<4> RdHi; + let Inst{19-16} = RdHi; + let Inst{15-12} = RdLo; } multiclass AI_smld<bit sub, string opc> { - def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">; + def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">; - def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">; + def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">; - def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b), - NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">; + def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), NoItinerary, + !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">; - def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b), - NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">; + def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi), + (ins GPR:$Rn, GPR:$Rm), NoItinerary, + !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">; } @@ -2180,16 +2929,10 @@ defm SMLS : AI_smld<1, "smls">; multiclass AI_sdml<bit sub, string opc> { - def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> { - let Inst{15-12} = 0b1111; - } - - def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> { - let Inst{15-12} = 0b1111; - } - + def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; + def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; } defm SMUA : AI_sdml<0, "smua">; @@ -2199,55 +2942,35 @@ defm SMUS : AI_sdml<1, "smus">; // Misc. Arithmetic Instructions. // -def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "clz", "\t$dst, $src", - [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> { - let Inst{7-4} = 0b0001; - let Inst{11-8} = 0b1111; - let Inst{19-16} = 0b1111; -} - -def RBIT : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rbit", "\t$dst, $src", - [(set GPR:$dst, (ARMrbit GPR:$src))]>, - Requires<[IsARM, HasV6T2]> { - let Inst{7-4} = 0b0011; - let Inst{11-8} = 0b1111; - let Inst{19-16} = 0b1111; -} - -def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev", "\t$dst, $src", - [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b0011; - let Inst{11-8} = 0b1111; - let Inst{19-16} = 0b1111; -} - -def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev16", "\t$dst, $src", - [(set GPR:$dst, - (or (and (srl GPR:$src, (i32 8)), 0xFF), - (or (and (shl GPR:$src, (i32 8)), 0xFF00), - (or (and (srl GPR:$src, (i32 8)), 0xFF0000), - (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>, - Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b1011; - let Inst{11-8} = 0b1111; - let Inst{19-16} = 0b1111; -} - -def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "revsh", "\t$dst, $src", - [(set GPR:$dst, +def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "clz", "\t$Rd, $Rm", + [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>; + +def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "rbit", "\t$Rd, $Rm", + [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>, + Requires<[IsARM, HasV6T2]>; + +def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "rev", "\t$Rd, $Rm", + [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>; + +def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "rev16", "\t$Rd, $Rm", + [(set GPR:$Rd, + (or (and (srl GPR:$Rm, (i32 8)), 0xFF), + (or (and (shl GPR:$Rm, (i32 8)), 0xFF00), + (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000), + (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>, + Requires<[IsARM, HasV6]>; + +def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), + IIC_iUNAr, "revsh", "\t$Rd, $Rm", + [(set GPR:$Rd, (sext_inreg - (or (srl (and GPR:$src, 0xFF00), (i32 8)), - (shl GPR:$src, (i32 8))), i16))]>, - Requires<[IsARM, HasV6]> { - let Inst{7-4} = 0b1011; - let Inst{11-8} = 0b1111; - let Inst{19-16} = 0b1111; -} + (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), + (shl GPR:$Rm, (i32 8))), i16))]>, + Requires<[IsARM, HasV6]>; def lsl_shift_imm : SDNodeXForm<imm, [{ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); @@ -2258,21 +2981,19 @@ def lsl_amt : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 32); }], lsl_shift_imm>; -def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), - (ins GPR:$src1, GPR:$src2, shift_imm:$sh), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh", - [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), - (and (shl GPR:$src2, lsl_amt:$sh), - 0xFFFF0000)))]>, - Requires<[IsARM, HasV6]> { - let Inst{6-4} = 0b001; -} +def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh), + IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", + [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF), + (and (shl GPR:$Rm, lsl_amt:$sh), + 0xFFFF0000)))]>, + Requires<[IsARM, HasV6]>; // Alternate cases for PKHBT where identities eliminate some nodes. -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), - (PKHBT GPR:$src1, GPR:$src2, 0)>; -def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)), - (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>; +def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)), + (PKHBT GPR:$Rn, GPR:$Rm, 0)>; +def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)), + (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>; def asr_shift_imm : SDNodeXForm<imm, [{ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue()); @@ -2285,15 +3006,13 @@ def asr_amt : PatLeaf<(i32 imm), [{ // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and // will match the pattern below. -def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), - (ins GPR:$src1, GPR:$src2, shift_imm:$sh), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh", - [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), - (and (sra GPR:$src2, asr_amt:$sh), - 0xFFFF)))]>, - Requires<[IsARM, HasV6]> { - let Inst{6-4} = 0b101; -} +def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh), + IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh", + [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000), + (and (sra GPR:$Rm, asr_amt:$sh), + 0xFFFF)))]>, + Requires<[IsARM, HasV6]>; // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. @@ -2308,10 +3027,19 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), // defm CMP : AI1_cmp_irs<0b1010, "cmp", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; -// FIXME: There seems to be a (potential) hardware bug with the CMN instruction -// and comparison with 0. These two pieces of code should give identical +// ARMcmpZ can re-use the above instruction definitions. +def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm), + (CMPri GPR:$src, so_imm:$imm)>; +def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs), + (CMPrr GPR:$src, GPR:$rhs)>; +def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs), + (CMPrs GPR:$src, so_reg:$rhs)>; + +// FIXME: We have to be careful when using the CMN instruction and comparison +// with 0. One would expect these two pieces of code should give identical // results: // // rsbs r1, r1, 0 @@ -2321,7 +3049,7 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp", // mov r0, #1 // // and: -// +// // cmn r0, r1 // mov r0, #0 // it ls @@ -2336,20 +3064,16 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp", // never a "carry" when this AddWithCarry is performed (because the "carry bit" // parameter to AddWithCarry is defined as 0). // -// The AddWithCarry in the CMP case seems to be relying upon the identity: -// -// ~x + 1 = -x -// -// However when x is 0 and unsigned, this doesn't hold: +// When x is 0 and unsigned: // // x = 0 // ~x = 0xFFFF FFFF // ~x + 1 = 0x1 0000 0000 // (-x = 0) != (0x1 0000 0000 = ~x + 1) // -// Therefore, we should disable *all* versions of CMN, especially when comparing -// against zero, until we can limit when the CMN instruction is used (when we -// know that the RHS is not 0) or when we have a hardware fix for this. +// Therefore, we should disable CMN when comparing against zero, until we can +// limit when the CMN instruction is used (when we know that the RHS is not 0 or +// when it's a comparison which doesn't look at the 'carry' flag). // // (See the ARM docs for the "AddWithCarry" pseudo-code.) // @@ -2360,13 +3084,14 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp", // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", - BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>; + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>; defm TEQ : AI1_cmp_irs<0b1001, "teq", - BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>; + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>; -defm CMPz : AI1_cmp_irs<0b1010, "cmp", - BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; defm CMNz : AI1_cmp_irs<0b1011, "cmn", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; //def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), @@ -2381,13 +3106,10 @@ let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, def BCCi64 : PseudoInst<(outs), (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), IIC_Br, - "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc", [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; def BCCZi64 : PseudoInst<(outs), - (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), - IIC_Br, - "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc", + (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br, [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>; } // usesCustomInserter @@ -2395,29 +3117,87 @@ def BCCZi64 : PseudoInst<(outs), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( +// FIXME: These should all be pseudo-instructions that get expanded to +// the normal MOV instructions. That would fix the dependency on +// special casing them in tblgen. let neverHasSideEffects = 1 in { -def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, - IIC_iCMOVr, "mov", "\t$dst, $true", - [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst">, UnaryDP { - let Inst{11-4} = 0b00000000; +def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm, + IIC_iCMOVr, "mov", "\t$Rd, $Rm", + [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">, UnaryDP { + bits<4> Rd; + bits<4> Rm; let Inst{25} = 0; + let Inst{20} = 0; + let Inst{15-12} = Rd; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; } -def MOVCCs : AI1<0b1101, (outs GPR:$dst), - (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr, - "mov", "\t$dst, $true", - [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst">, UnaryDP { +def MOVCCs : AI1<0b1101, (outs GPR:$Rd), + (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr, + "mov", "\t$Rd, $shift", + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">, UnaryDP { + bits<4> Rd; + bits<12> shift; let Inst{25} = 0; + let Inst{20} = 0; + let Inst{19-16} = 0; + let Inst{15-12} = Rd; + let Inst{11-0} = shift; } -def MOVCCi : AI1<0b1101, (outs GPR:$dst), - (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi, - "mov", "\t$dst, $true", - [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst">, UnaryDP { +let isMoveImm = 1 in +def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm), + DPFrm, IIC_iMOVi, + "movw", "\t$Rd, $imm", + []>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, + UnaryDP { + bits<4> Rd; + bits<16> imm; + let Inst{25} = 1; + let Inst{20} = 0; + let Inst{19-16} = imm{15-12}; + let Inst{15-12} = Rd; + let Inst{11-0} = imm{11-0}; +} + +let isMoveImm = 1 in +def MOVCCi : AI1<0b1101, (outs GPR:$Rd), + (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, + "mov", "\t$Rd, $imm", + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">, UnaryDP { + bits<4> Rd; + bits<12> imm; let Inst{25} = 1; + let Inst{20} = 0; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; +} + +// Two instruction predicate mov immediate. +let isMoveImm = 1 in +def MOVCCi32imm : PseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm:$src, pred:$p), + IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; + +let isMoveImm = 1 in +def MVNCCi : AI1<0b1111, (outs GPR:$Rd), + (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, + "mvn", "\t$Rd, $imm", + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">, UnaryDP { + bits<4> Rd; + bits<12> imm; + let Inst{25} = 1; + let Inst{20} = 0; + let Inst{19-16} = 0b0000; + let Inst{15-12} = Rd; + let Inst{11-0} = imm; } } // neverHasSideEffects @@ -2425,64 +3205,41 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), // Atomic operations intrinsics // +def memb_opt : Operand<i32> { + let PrintMethod = "printMemBOption"; + let ParserMatchClass = MemBarrierOptOperand; +} + // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "", - [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> { +def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, + "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, + Requires<[IsARM, HasDB]> { + bits<4> opt; let Inst{31-4} = 0xf57ff05; - // FIXME: add support for options other than a full system DMB - // See DMB disassembly-only variants below. - let Inst{3-0} = 0b1111; -} - -def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "", - [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> { - let Inst{31-4} = 0xf57ff04; - // FIXME: add support for options other than a full system DSB - // See DSB disassembly-only variants below. - let Inst{3-0} = 0b1111; + let Inst{3-0} = opt; } def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, "mcr", "\tp15, 0, $zero, c7, c10, 5", [(ARMMemBarrierMCR GPR:$zero)]>, Requires<[IsARM, HasV6]> { - // FIXME: add support for options other than a full system DMB // FIXME: add encoding } - -def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, - "mcr", "\tp15, 0, $zero, c7, c10, 4", - [(ARMSyncBarrierMCR GPR:$zero)]>, - Requires<[IsARM, HasV6]> { - // FIXME: add support for options other than a full system DSB - // FIXME: add encoding -} -} - -// Memory Barrier Operations Variants -- for disassembly only - -def memb_opt : Operand<i32> { - let PrintMethod = "printMemBOption"; } -class AMBI<bits<4> op7_4, string opc> - : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasDB]> { - let Inst{31-8} = 0xf57ff0; - let Inst{7-4} = op7_4; +def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, + "dsb", "\t$opt", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsARM, HasDB]> { + bits<4> opt; + let Inst{31-4} = 0xf57ff04; + let Inst{3-0} = opt; } -// These DMB variants are for disassembly only. -def DMBvar : AMBI<0b0101, "dmb">; - -// These DSB variants are for disassembly only. -def DSBvar : AMBI<0b0100, "dsb">; - // ISB has only full system option -- for disassembly only -def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>, - Requires<[IsARM, HasDB]> { +def ISB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>, + Requires<[IsARM, HasDB]> { let Inst{31-4} = 0xf57ff06; let Inst{3-0} = 0b1111; } @@ -2491,138 +3248,114 @@ let usesCustomInserter = 1 in { let Uses = [CPSR] in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!", [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_SUB_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!", [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_AND_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!", [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_OR_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!", [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_XOR_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!", [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_NAND_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!", [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!", [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_SUB_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!", [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_AND_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!", [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_OR_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!", [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_XOR_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!", [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_NAND_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!", [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!", [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_SUB_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!", [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_AND_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!", [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_OR_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!", [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_XOR_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!", [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>; def ATOMIC_LOAD_NAND_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!", [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; def ATOMIC_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - "${:comment} ATOMIC_SWAP_I8 PSEUDO!", [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>; def ATOMIC_SWAP_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - "${:comment} ATOMIC_SWAP_I16 PSEUDO!", [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>; def ATOMIC_SWAP_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - "${:comment} ATOMIC_SWAP_I32 PSEUDO!", [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>; def ATOMIC_CMP_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!", [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>; def ATOMIC_CMP_SWAP_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!", [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>; def ATOMIC_CMP_SWAP_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!", [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>; } } let mayLoad = 1 in { -def LDREXB : AIldrex<0b10, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, - "ldrexb", "\t$dest, [$ptr]", +def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, + "ldrexb", "\t$Rt, [$Rn]", []>; -def LDREXH : AIldrex<0b11, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, - "ldrexh", "\t$dest, [$ptr]", +def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, + "ldrexh", "\t$Rt, [$Rn]", []>; -def LDREX : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, - "ldrex", "\t$dest, [$ptr]", +def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, + "ldrex", "\t$Rt, [$Rn]", []>; -def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), +def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn), NoItinerary, - "ldrexd", "\t$dest, $dest2, [$ptr]", + "ldrexd", "\t$Rt, $Rt2, [$Rn]", []>; } -let mayStore = 1, Constraints = "@earlyclobber $success" in { -def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +let mayStore = 1, Constraints = "@earlyclobber $Rd" in { +def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn), NoItinerary, - "strexb", "\t$success, $src, [$ptr]", + "strexb", "\t$Rd, $src, [$Rn]", []>; -def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn), NoItinerary, - "strexh", "\t$success, $src, [$ptr]", + "strexh", "\t$Rd, $Rt, [$Rn]", []>; -def STREX : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), +def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn), NoItinerary, - "strex", "\t$success, $src, [$ptr]", + "strex", "\t$Rd, $Rt, [$Rn]", []>; -def STREXD : AIstrex<0b01, (outs GPR:$success), - (ins GPR:$src, GPR:$src2, GPR:$ptr), +def STREXD : AIstrex<0b01, (outs GPR:$Rd), + (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn), NoItinerary, - "strexd", "\t$success, $src, $src2, [$ptr]", + "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", []>; } @@ -2630,29 +3363,15 @@ def STREXD : AIstrex<0b01, (outs GPR:$success), def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", [/* For disassembly only; pattern left blank */]>, Requires<[IsARM, HasV7]> { - let Inst{31-20} = 0xf57; - let Inst{7-4} = 0b0001; + let Inst{31-0} = 0b11110101011111111111000000011111; } // SWP/SWPB are deprecated in V6/V7 and for disassembly only. let mayLoad = 1 in { -def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, - "swp", "\t$dst, $src, [$ptr]", - [/* For disassembly only; pattern left blank */]> { - let Inst{27-23} = 0b00010; - let Inst{22} = 0; // B = 0 - let Inst{21-20} = 0b00; - let Inst{7-4} = 0b1001; -} - -def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, - "swpb", "\t$dst, $src, [$ptr]", - [/* For disassembly only; pattern left blank */]> { - let Inst{27-23} = 0b00010; - let Inst{22} = 1; // B = 1 - let Inst{21-20} = 0b00; - let Inst{7-4} = 0b1001; -} +def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp", + [/* For disassembly only; pattern left blank */]>; +def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb", + [/* For disassembly only; pattern left blank */]>; } //===----------------------------------------------------------------------===// @@ -2660,10 +3379,11 @@ def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary, // // __aeabi_read_tp preserves the registers r1-r3. +// This is a pseudo inst so that we can get the encoding right, +// complete with fixup for the aeabi_read_tp function. let isCall = 1, - Defs = [R0, R12, LR, CPSR] in { - def TPsoft : ABXI<0b1011, (outs), (ins), IIC_Br, - "bl\t__aeabi_read_tp", + Defs = [R0, R12, LR, CPSR], Uses = [SP] in { + def TPsoft : PseudoInst<(outs), (ins), IIC_Br, [(set R0, ARMthread_pointer)]>; } @@ -2680,19 +3400,16 @@ let isCall = 1, // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. // A constant value is passed in $val, and we use the location as a scratch. +// +// These are pseudo-instructions and are lowered to individual MC-insts, so +// no encoding information is necessary. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31 ], hasSideEffects = 1, isBarrier = 1 in { - def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), - AddrModeNone, SizeSpecial, IndexModeNone, - Pseudo, NoItinerary, - "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t" - "str\t$val, [$src, #+4]\n\t" - "mov\tr0, #0\n\t" - "add\tpc, pc, #0\n\t" - "mov\tr0, #1 ${:comment} eh_setjmp end", "", + def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), + NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } @@ -2700,14 +3417,8 @@ let Defs = let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], hasSideEffects = 1, isBarrier = 1 in { - def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), - AddrModeNone, SizeSpecial, IndexModeNone, - Pseudo, NoItinerary, - "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t" - "str\t$val, [$src, #+4]\n\t" - "mov\tr0, #0\n\t" - "add\tpc, pc, #0\n\t" - "mov\tr0, #1 ${:comment} eh_setjmp end", "", + def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), + NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } @@ -2715,53 +3426,58 @@ let Defs = // FIXME: Non-Darwin version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, Defs = [ R7, LR, SP ] in { -def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), - AddrModeNone, SizeSpecial, IndexModeNone, - Pseudo, NoItinerary, - "ldr\tsp, [$src, #8]\n\t" - "ldr\t$scratch, [$src, #4]\n\t" - "ldr\tr7, [$src]\n\t" - "bx\t$scratch", "", +def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), + NoItinerary, [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, Requires<[IsARM, IsDarwin]>; } +// eh.sjlj.dispatchsetup pseudo-instruction. +// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are +// handled when the pseudo is expanded (which happens before any passes +// that need the instruction size). +let isBarrier = 1, hasSideEffects = 1 in +def Int_eh_sjlj_dispatchsetup : + PseudoInst<(outs), (ins GPR:$src), NoItinerary, + [(ARMeh_sjlj_dispatchsetup GPR:$src)]>, + Requires<[IsDarwin]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // // Large immediate handling. -// Two piece so_imms. -let isReMaterializable = 1 in -def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), - Pseudo, IIC_iMOVi, - "mov", "\t$dst, $src", - [(set GPR:$dst, so_imm2part:$src)]>, - Requires<[IsARM, NoV6T2]>; - -def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), - (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), - (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS), - (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS), - (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)), - (so_neg_imm2part_2 imm:$RHS))>; - -// 32-bit immediate using movw + movt. +// 32-bit immediate using two piece so_imms or movw + movt. // This is a single pseudo instruction, the benefit is that it can be remat'd // as a single unit instead of having to handle reg inputs. // FIXME: Remove this when we can do generalized remat. -let isReMaterializable = 1 in -def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, - "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", - [(set GPR:$dst, (i32 imm:$src))]>, - Requires<[IsARM, HasV6T2]>; +let isReMaterializable = 1, isMoveImm = 1 in +def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, + [(set GPR:$dst, (arm_i32imm:$src))]>, + Requires<[IsARM]>; + +// Pseudo instruction that combines movw + movt + add pc (if PIC). +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. +let isReMaterializable = 1 in { +def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, + [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + +def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2, + [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + +let AddedComplexity = 10 in +def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2ld, + [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, UseMovt]>; +} // isReMaterializable // ConstantPool, GlobalAddress, and JumpTable def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, @@ -2800,11 +3516,15 @@ def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, Requires<[IsARM, IsDarwin]>; // zextload i1 -> zextload i8 -def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; +def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; // extload -> zextload -def : ARMPat<(extloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; -def : ARMPat<(extloadi8 addrmode2:$addr), (LDRB addrmode2:$addr)>; +def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; +def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; + def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; @@ -2889,19 +3609,45 @@ include "ARMInstrNEON.td" // Coprocessor Instructions. For disassembly only. // -def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, - nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), - NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2", - [/* For disassembly only; pattern left blank */]> { - let Inst{4} = 0; -} - -def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, - nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), - NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2", +def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; +} + +def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; - let Inst{4} = 0; + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; } class ACI<dag oops, dag iops, string opc, string asm> @@ -3000,110 +3746,164 @@ defm LDC2 : LdStCop<0b1111, 1, "ldc2">; defm STC : LdStCop<{?,?,?,?}, 0, "stc">; defm STC2 : LdStCop<0b1111, 0, "stc2">; -def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, - GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), - NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", - [/* For disassembly only; pattern left blank */]> { - let Inst{20} = 0; - let Inst{4} = 1; -} - -def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, - GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), - NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - let Inst{20} = 0; - let Inst{4} = 1; -} +//===----------------------------------------------------------------------===// +// Move between coprocessor and ARM core register -- for disassembly only +// -def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, - GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), - NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", - [/* For disassembly only; pattern left blank */]> { - let Inst{20} = 1; +class MovRCopro<string opc, bit direction> + : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{20} = direction; let Inst{4} = 1; -} -def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1, - GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2), - NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2", - [/* For disassembly only; pattern left blank */]> { + bits<4> Rt; + bits<4> cop; + bits<3> opc1; + bits<3> opc2; + bits<4> CRm; + bits<4> CRn; + + let Inst{15-12} = Rt; + let Inst{11-8} = cop; + let Inst{23-21} = opc1; + let Inst{7-5} = opc2; + let Inst{3-0} = CRm; + let Inst{19-16} = CRn; +} + +def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>; +def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>; + +class MovRCopro2<string opc, bit direction> + : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), + [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; - let Inst{20} = 1; + let Inst{20} = direction; let Inst{4} = 1; -} -def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, - GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), - NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm", - [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0100; -} - -def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, - GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), - NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm", - [/* For disassembly only; pattern left blank */]> { + bits<4> Rt; + bits<4> cop; + bits<3> opc1; + bits<3> opc2; + bits<4> CRm; + bits<4> CRn; + + let Inst{15-12} = Rt; + let Inst{11-8} = cop; + let Inst{23-21} = opc1; + let Inst{7-5} = opc2; + let Inst{3-0} = CRm; + let Inst{19-16} = CRn; +} + +def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>; +def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>; + +class MovRRCopro<string opc, bit direction> + : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-21} = 0b010; + let Inst{20} = direction; + + bits<4> Rt; + bits<4> Rt2; + bits<4> cop; + bits<4> opc1; + bits<4> CRm; + + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{11-8} = cop; + let Inst{7-4} = opc1; + let Inst{3-0} = CRm; +} + +def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>; +def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; + +class MovRRCopro2<string opc, bit direction> + : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), + [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; - let Inst{23-20} = 0b0100; -} + let Inst{23-21} = 0b010; + let Inst{20} = direction; -def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, - GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), - NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm", - [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0101; -} + bits<4> Rt; + bits<4> Rt2; + bits<4> cop; + bits<4> opc1; + bits<4> CRm; -def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc, - GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm), - NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - let Inst{23-20} = 0b0101; + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{11-8} = cop; + let Inst{7-4} = opc1; + let Inst{3-0} = CRm; } +def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */>; +def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; + //===----------------------------------------------------------------------===// // Move between special register and ARM core register -- for disassembly only // -def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr", +// Move to ARM core register from Special Register +def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr", [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0000; + bits<4> Rd; + let Inst{23-16} = 0b00001111; + let Inst{15-12} = Rd; let Inst{7-4} = 0b0000; } -def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr", +def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr", [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0100; + bits<4> Rd; + let Inst{23-16} = 0b01001111; + let Inst{15-12} = Rd; let Inst{7-4} = 0b0000; } -def MSR : ABI<0b0001, (outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, - "msr", "\tcpsr$mask, $src", +// Move from ARM core register to Special Register +// +// No need to have both system and application versions, the encodings are the +// same and the assembly parser has no way to distinguish between them. The mask +// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains +// the mask with the fields to be accessed in the special register. +def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, + "msr", "\t$mask, $Rn", [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0010; - let Inst{7-4} = 0b0000; -} + bits<5> mask; + bits<4> Rn; -def MSRi : ABI<0b0011, (outs), (ins so_imm:$a, msr_mask:$mask), NoItinerary, - "msr", "\tcpsr$mask, $a", - [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0010; - let Inst{7-4} = 0b0000; + let Inst{23} = 0; + let Inst{22} = mask{4}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = mask{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rn; } -def MSRsys : ABI<0b0001, (outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, - "msr", "\tspsr$mask, $src", - [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0110; - let Inst{7-4} = 0b0000; -} +def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, + "msr", "\t$mask, $a", + [/* For disassembly only; pattern left blank */]> { + bits<5> mask; + bits<12> a; -def MSRsysi : ABI<0b0011, (outs), (ins so_imm:$a, msr_mask:$mask), NoItinerary, - "msr", "\tspsr$mask, $a", - [/* For disassembly only; pattern left blank */]> { - let Inst{23-20} = 0b0110; - let Inst{7-4} = 0b0000; + let Inst{23} = 0; + let Inst{22} = mask{4}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = mask{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-0} = a; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 4d2f116..1e2e550 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -16,11 +16,17 @@ //===----------------------------------------------------------------------===// def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; +def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; +def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; +def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; +def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; +def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; +def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; @@ -69,6 +75,11 @@ def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; +def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; + def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -129,830 +140,1506 @@ def nModImm : Operand<i32> { // NEON load / store instructions //===----------------------------------------------------------------------===// -// Use vldmia to load a Q register as a D register pair. -// This is equivalent to VLDMD except that it has a Q register operand -// instead of a pair of D registers. -def VLDMQ - : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p), - IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "", - [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>; - -let mayLoad = 1, neverHasSideEffects = 1 in { -// Use vld1 to load a Q register as a D register pair. -// This alternative to VLDMQ allows an alignment to be specified. -// This is equivalent to VLD1q64 except that it has a Q register operand. -def VLD1q - : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), - IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; -} // mayLoad = 1, neverHasSideEffects = 1 - -// Use vstmia to store a Q register as a D register pair. -// This is equivalent to VSTMD except that it has a Q register operand -// instead of a pair of D registers. -def VSTMQ - : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p), - IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "", - [(store (v2f64 QPR:$src), addrmode4:$addr)]>; - -let mayStore = 1, neverHasSideEffects = 1 in { -// Use vst1 to store a Q register as a D register pair. -// This alternative to VSTMQ allows an alignment to be specified. -// This is equivalent to VST1q64 except that it has a Q register operand. -def VST1q - : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), - IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; -} // mayStore = 1, neverHasSideEffects = 1 - -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +// Use VLDM to load a Q register as a D register pair. +// This is a pseudo instruction that is expanded to VLDMD after reg alloc. +def VLDMQIA + : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), + IIC_fpLoad_m, "", + [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; +def VLDMQDB + : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), + IIC_fpLoad_m, "", + [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; + +// Use VSTM to store a Q register as a D register pair. +// This is a pseudo instruction that is expanded to VSTMD after reg alloc. +def VSTMQIA + : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), + IIC_fpStore_m, "", + [(store (v2f64 QPR:$src), GPR:$Rn)]>; +def VSTMQDB + : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), + IIC_fpStore_m, "", + [(store (v2f64 QPR:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. -class VLDQPseudo - : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">; -class VLDQWBPseudo +class VLDQPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; +class VLDQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VST, + (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; -class VLDQQPseudo - : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">; -class VLDQQWBPseudo +class VLDQQPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; +class VLDQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VST, + (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; -class VLDQQQQWBPseudo +class VLDQQQQPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">; +class VLDQQQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, "$addr.addr = $wb, $src = $dst">; +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { + // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", Dt, "\\{$dst\\}, $addr", "", []>; + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd), + (ins addrmode6:$Rn), IIC_VLD1, + "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} class VLD1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr), IIC_VLD1, - "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; + : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2), + (ins addrmode6:$Rn), IIC_VLD1x2, + "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} -def VLD1d8 : VLD1D<0b0000, "8">; -def VLD1d16 : VLD1D<0b0100, "16">; -def VLD1d32 : VLD1D<0b1000, "32">; -def VLD1d64 : VLD1D<0b1100, "64">; +def VLD1d8 : VLD1D<{0,0,0,?}, "8">; +def VLD1d16 : VLD1D<{0,1,0,?}, "16">; +def VLD1d32 : VLD1D<{1,0,0,?}, "32">; +def VLD1d64 : VLD1D<{1,1,0,?}, "64">; -def VLD1q8 : VLD1Q<0b0000, "8">; -def VLD1q16 : VLD1Q<0b0100, "16">; -def VLD1q32 : VLD1Q<0b1000, "32">; -def VLD1q64 : VLD1Q<0b1100, "64">; +def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; +def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; +def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; +def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; -def VLD1q8Pseudo : VLDQPseudo; -def VLD1q16Pseudo : VLDQPseudo; -def VLD1q32Pseudo : VLDQPseudo; -def VLD1q64Pseudo : VLDQPseudo; +def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>; +def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>; +def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; +def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; // ...with address register writeback: class VLD1DWB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, - "vld1", Dt, "\\{$dst\\}, $addr$offset", - "$addr.addr = $wb", []>; + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u, + "vld1", Dt, "\\{$Vd\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} class VLD1QWB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, - "vld1", Dt, "${dst:dregpair}, $addr$offset", - "$addr.addr = $wb", []>; - -def VLD1d8_UPD : VLD1DWB<0b0000, "8">; -def VLD1d16_UPD : VLD1DWB<0b0100, "16">; -def VLD1d32_UPD : VLD1DWB<0b1000, "32">; -def VLD1d64_UPD : VLD1DWB<0b1100, "64">; - -def VLD1q8_UPD : VLD1QWB<0b0000, "8">; -def VLD1q16_UPD : VLD1QWB<0b0100, "16">; -def VLD1q32_UPD : VLD1QWB<0b1000, "32">; -def VLD1q64_UPD : VLD1QWB<0b1100, "64">; - -def VLD1q8Pseudo_UPD : VLDQWBPseudo; -def VLD1q16Pseudo_UPD : VLDQWBPseudo; -def VLD1q32Pseudo_UPD : VLDQWBPseudo; -def VLD1q64Pseudo_UPD : VLDQWBPseudo; + : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u, + "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} + +def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">; +def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">; +def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">; +def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">; + +def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">; +def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">; +def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">; +def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">; + +def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; +def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; +def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; +def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; // ...with 3 registers (some of these are only for the disassembler): class VLD1D3<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; + : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, + "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} class VLD1D3WB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>; + : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt, + "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} -def VLD1d8T : VLD1D3<0b0000, "8">; -def VLD1d16T : VLD1D3<0b0100, "16">; -def VLD1d32T : VLD1D3<0b1000, "32">; -def VLD1d64T : VLD1D3<0b1100, "64">; +def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; +def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; +def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; +def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; -def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; -def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; -def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; -def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">; +def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">; +def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">; +def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">; +def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">; -def VLD1d64TPseudo : VLDQQPseudo; -def VLD1d64TPseudo_UPD : VLDQQWBPseudo; +def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; +def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>; // ...with 4 registers (some of these are only for the disassembler): class VLD1D4<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; + : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, + "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} class VLD1D4WB<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, - "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", - []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt, + "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", + []> { + let Inst{5-4} = Rn{5-4}; +} -def VLD1d8Q : VLD1D4<0b0000, "8">; -def VLD1d16Q : VLD1D4<0b0100, "16">; -def VLD1d32Q : VLD1D4<0b1000, "32">; -def VLD1d64Q : VLD1D4<0b1100, "64">; +def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; +def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; +def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; +def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; -def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; -def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; -def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; -def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">; +def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">; +def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">; +def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">; +def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">; -def VLD1d64QPseudo : VLDQQPseudo; -def VLD1d64QPseudo_UPD : VLDQQWBPseudo; +def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; +def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr), IIC_VLD2, - "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), + (ins addrmode6:$Rn), IIC_VLD2, + "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} class VLD2Q<bits<4> op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD2, - "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$Rn), IIC_VLD2x2, + "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} -def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; -def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; -def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; +def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">; +def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">; +def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">; -def VLD2q8 : VLD2Q<0b0000, "8">; -def VLD2q16 : VLD2Q<0b0100, "16">; -def VLD2q32 : VLD2Q<0b1000, "32">; +def VLD2q8 : VLD2Q<{0,0,?,?}, "8">; +def VLD2q16 : VLD2Q<{0,1,?,?}, "16">; +def VLD2q32 : VLD2Q<{1,0,?,?}, "32">; -def VLD2d8Pseudo : VLDQPseudo; -def VLD2d16Pseudo : VLDQPseudo; -def VLD2d32Pseudo : VLDQPseudo; +def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; +def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; +def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>; -def VLD2q8Pseudo : VLDQQPseudo; -def VLD2q16Pseudo : VLDQQPseudo; -def VLD2q32Pseudo : VLDQQPseudo; +def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; +def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; +def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; // ...with address register writeback: class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, - "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", - "$addr.addr = $wb", []>; + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, + "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} class VLD2QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, - "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", - "$addr.addr = $wb", []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, + "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} -def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; -def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; -def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; +def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">; +def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">; +def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">; -def VLD2q8_UPD : VLD2QWB<0b0000, "8">; -def VLD2q16_UPD : VLD2QWB<0b0100, "16">; -def VLD2q32_UPD : VLD2QWB<0b1000, "32">; +def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">; +def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">; +def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">; -def VLD2d8Pseudo_UPD : VLDQWBPseudo; -def VLD2d16Pseudo_UPD : VLDQWBPseudo; -def VLD2d32Pseudo_UPD : VLDQWBPseudo; +def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; +def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; +def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2q8Pseudo_UPD : VLDQQWBPseudo; -def VLD2q16Pseudo_UPD : VLDQQWBPseudo; -def VLD2q32Pseudo_UPD : VLDQQWBPseudo; +def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; +def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; +def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; // ...with double-spaced registers (for disassembly only): -def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; -def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; -def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; -def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; -def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; -def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; +def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">; +def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">; +def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">; +def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">; +def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">; +def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr), IIC_VLD3, - "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$Rn), IIC_VLD3, + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} -def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; -def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; -def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; +def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; +def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; +def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo : VLDQQPseudo; -def VLD3d16Pseudo : VLDQQPseudo; -def VLD3d32Pseudo : VLDQQPseudo; +def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; +def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; +def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; // ...with address register writeback: class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, - "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", - "$addr.addr = $wb", []>; - -def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; -def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; -def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; - -def VLD3d8Pseudo_UPD : VLDQQWBPseudo; -def VLD3d16Pseudo_UPD : VLDQQWBPseudo; -def VLD3d32Pseudo_UPD : VLDQQWBPseudo; - -// ...with double-spaced registers (non-updating versions for disassembly only): -def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; -def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; -def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; -def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; -def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; -def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; - -def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; -def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; -def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} + +def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; +def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; +def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; + +def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; + +// ...with double-spaced registers: +def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; +def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; +def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; +def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; +def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; +def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; + +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; // ...alternate versions to be allocated odd register numbers: -def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; +def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; +def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; + +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD4, - "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$Rn), IIC_VLD4, + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} -def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; -def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; -def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; +def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; +def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; +def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo : VLDQQPseudo; -def VLD4d16Pseudo : VLDQQPseudo; -def VLD4d32Pseudo : VLDQQPseudo; +def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; +def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; +def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; // ...with address register writeback: class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, - "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", - "$addr.addr = $wb", []>; - -def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; -def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; -def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; - -def VLD4d8Pseudo_UPD : VLDQQWBPseudo; -def VLD4d16Pseudo_UPD : VLDQQWBPseudo; -def VLD4d32Pseudo_UPD : VLDQQWBPseudo; - -// ...with double-spaced registers (non-updating versions for disassembly only): -def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; -def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; -def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; -def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; -def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; -def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; - -def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; -def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; -def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} + +def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; +def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; +def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; + +def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; + +// ...with double-spaced registers: +def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; +def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; +def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; +def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; +def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; +def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; + +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; // ...alternate versions to be allocated odd register numbers: -def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; +def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; +def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; + +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; + +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 + +// Classes for VLD*LN pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VLDQLNPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QPR:$dst), + (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), + itin, "$src = $dst">; +class VLDQLNWBPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; +class VLDQQLNPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst), + (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), + itin, "$src = $dst">; +class VLDQQLNWBPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; +class VLDQQQQLNPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQQQPR:$dst), + (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), + itin, "$src = $dst">; +class VLDQQQQLNWBPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; // VLD1LN : Vector Load (single element to one lane) -// FIXME: Not yet implemented. +class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag LoadOp> + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), + (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), + IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", + "$src = $Vd", + [(set DPR:$Vd, (vector_insert (Ty DPR:$src), + (i32 (LoadOp addrmode6:$Rn)), + imm:$lane))]> { + let Rm = 0b1111; +} +class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { + let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), + (i32 (LoadOp addrmode6:$addr)), + imm:$lane))]; +} + +def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { + let Inst{7-5} = lane{2-0}; +} +def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{4}; +} +def VLD1LNd32 : VLD1LN<0b1000, {?,0,?,?}, "32", v2i32, load> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{4}; + let Inst{4} = Rn{4}; +} + +def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; +def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; +def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; + +def : Pat<(vector_insert (v2f32 DPR:$src), + (f32 (load addrmode6:$addr)), imm:$lane), + (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; +def : Pat<(vector_insert (v4f32 QPR:$src), + (f32 (load addrmode6:$addr)), imm:$lane), + (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { + +// ...with address register writeback: +class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, + "\\{$Vd[$lane]\\}, $Rn$Rm", + "$src = $Vd, $Rn.addr = $wb", []>; + +def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{4}; +} +def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{4}; + let Inst{4} = Rn{4}; +} + +def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; +def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; +def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; // VLD2LN : Vector Load (single 2-element structure to one lane) class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2", []>; + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), + (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", + "$src1 = $Vd, $src2 = $dst2", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} + +def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} -def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">; -def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">; -def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">; +def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; +def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; +def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; // ...with double-spaced registers: -def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">; -def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">; +def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; +} -// ...alternate versions to be allocated odd register numbers: -def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">; -def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">; +def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; +def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; // ...with address register writeback: class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, - "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", - "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, + "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", + "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} -def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">; -def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">; -def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">; +def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} -def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">; -def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">; +def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; +def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; +def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; + +def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; +def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, "vld3", Dt, - "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; + : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, + "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { + let Rm = 0b1111; +} + +def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} -def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">; -def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">; -def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">; +def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; +def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; +def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; // ...with double-spaced registers: -def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">; -def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">; +def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} -// ...alternate versions to be allocated odd register numbers: -def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">; -def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">; +def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; +def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; // ...with address register writeback: class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, + : NLdStLn<1, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), - IIC_VLD3, "vld3", Dt, - "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", + IIC_VLD3lnu, "vld3", Dt, + "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", []>; -def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">; -def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">; -def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">; +def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; +def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; +def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; + +def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} -def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">; -def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">; +def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; +def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, "vld4", Dt, - "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; + : NLdStLn<1, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, + "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} -def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">; -def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">; -def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">; +def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; +def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; +def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; // ...with double-spaced registers: -def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">; -def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">; +def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} -// ...alternate versions to be allocated odd register numbers: -def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">; -def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">; +def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; +def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; // ...with address register writeback: class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, op11_8, op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, + : NLdStLn<1, 0b10, op11_8, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), - IIC_VLD4, "vld4", Dt, -"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", -"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", - []>; + IIC_VLD4lnu, "vld4", Dt, +"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", +"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", + []> { + let Inst{4} = Rn{4}; +} + +def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; +def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; +def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; -def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">; -def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">; -def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">; +def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} -def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">; -def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; +def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; +def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; + +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // VLD1DUP : Vector Load (single element to all lanes) +class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), + IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", + [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} +class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { + let Pattern = [(set QPR:$dst, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; +} + +def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; +def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; +def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; + +def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>; +def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>; +def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>; + +def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPd32 addrmode6:$addr)>; +def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPq32Pseudo addrmode6:$addr)>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { + +class VLD1QDUP<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), + (ins addrmode6dup:$Rn), IIC_VLD1dup, + "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} + +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; + +// ...with address register writeback: +class VLD1DUPWB<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, + "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} +class VLD1QDUPWB<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, + "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} + +def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; +def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">; +def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">; + +def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">; +def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">; +def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">; + +def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; +def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; +def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; + // VLD2DUP : Vector Load (single 2-element structure to all lanes) +class VLD2DUP<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), + (ins addrmode6dup:$Rn), IIC_VLD2dup, + "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} + +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; + +def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; +def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; +def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; + +// ...with double-spaced registers (not used for codegen): +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; + +// ...with address register writeback: +class VLD2DUPWB<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, + "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} + +def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; +def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; +def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; + +def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; +def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; +def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; + +def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; +def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; +def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; + // VLD3DUP : Vector Load (single 3-element structure to all lanes) +class VLD3DUP<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + (ins addrmode6dup:$Rn), IIC_VLD3dup, + "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} + +def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; +def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; +def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; + +def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; +def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; +def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; + +// ...with double-spaced registers (not used for codegen): +def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; + +// ...with address register writeback: +class VLD3DUPWB<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, + "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} + +def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; +def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; +def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; + +def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; +def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; +def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; + +def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; +def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; +def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; + // VLD4DUP : Vector Load (single 4-element structure to all lanes) -// FIXME: Not yet implemented. +class VLD4DUP<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1111, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6dup:$Rn), IIC_VLD4dup, + "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} + +def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; +def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; +def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } + +def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; +def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; +def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; + +// ...with double-spaced registers (not used for codegen): +def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } + +// ...with address register writeback: +class VLD4DUPWB<bits<4> op7_4, string Dt> + : NLdSt<1, 0b10, 0b1111, op7_4, + (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, + "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} + +def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; +def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; +def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } + +def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } + +def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; +def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; +def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; + } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // Classes for VST* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. -class VSTQPseudo - : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">; -class VSTQWBPseudo +class VSTQPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; +class VSTQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, "$addr.addr = $wb">; -class VSTQQPseudo - : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">; -class VSTQQWBPseudo +class VSTQQPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; +class VSTQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST, + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, "$addr.addr = $wb">; -class VSTQQQQWBPseudo +class VSTQQQQPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; +class VSTQQQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST, + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, "$addr.addr = $wb">; // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - "vst1", Dt, "\\{$src\\}, $addr", "", []>; + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd), + IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} class VST1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>; + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2, + "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} -def VST1d8 : VST1D<0b0000, "8">; -def VST1d16 : VST1D<0b0100, "16">; -def VST1d32 : VST1D<0b1000, "32">; -def VST1d64 : VST1D<0b1100, "64">; +def VST1d8 : VST1D<{0,0,0,?}, "8">; +def VST1d16 : VST1D<{0,1,0,?}, "16">; +def VST1d32 : VST1D<{1,0,0,?}, "32">; +def VST1d64 : VST1D<{1,1,0,?}, "64">; -def VST1q8 : VST1Q<0b0000, "8">; -def VST1q16 : VST1Q<0b0100, "16">; -def VST1q32 : VST1Q<0b1000, "32">; -def VST1q64 : VST1Q<0b1100, "64">; +def VST1q8 : VST1Q<{0,0,?,?}, "8">; +def VST1q16 : VST1Q<{0,1,?,?}, "16">; +def VST1q32 : VST1Q<{1,0,?,?}, "32">; +def VST1q64 : VST1Q<{1,1,?,?}, "64">; -def VST1q8Pseudo : VSTQPseudo; -def VST1q16Pseudo : VSTQPseudo; -def VST1q32Pseudo : VSTQPseudo; -def VST1q64Pseudo : VSTQPseudo; +def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>; +def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>; +def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; +def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; // ...with address register writeback: class VST1DWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, - "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; + (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u, + "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} class VST1QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, - "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>; + (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), + IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} -def VST1d8_UPD : VST1DWB<0b0000, "8">; -def VST1d16_UPD : VST1DWB<0b0100, "16">; -def VST1d32_UPD : VST1DWB<0b1000, "32">; -def VST1d64_UPD : VST1DWB<0b1100, "64">; +def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">; +def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">; +def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">; +def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">; -def VST1q8_UPD : VST1QWB<0b0000, "8">; -def VST1q16_UPD : VST1QWB<0b0100, "16">; -def VST1q32_UPD : VST1QWB<0b1000, "32">; -def VST1q64_UPD : VST1QWB<0b1100, "64">; +def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">; +def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">; +def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">; +def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">; -def VST1q8Pseudo_UPD : VSTQWBPseudo; -def VST1q16Pseudo_UPD : VSTQWBPseudo; -def VST1q32Pseudo_UPD : VSTQWBPseudo; -def VST1q64Pseudo_UPD : VSTQWBPseudo; +def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; +def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; +def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; +def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; // ...with 3 registers (some of these are only for the disassembler): class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), + IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} class VST1D3WB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", - "$addr.addr = $wb", []>; + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3), + IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} -def VST1d8T : VST1D3<0b0000, "8">; -def VST1d16T : VST1D3<0b0100, "16">; -def VST1d32T : VST1D3<0b1000, "32">; -def VST1d64T : VST1D3<0b1100, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8">; +def VST1d16T : VST1D3<{0,1,0,?}, "16">; +def VST1d32T : VST1D3<{1,0,0,?}, "32">; +def VST1d64T : VST1D3<{1,1,0,?}, "64">; -def VST1d8T_UPD : VST1D3WB<0b0000, "8">; -def VST1d16T_UPD : VST1D3WB<0b0100, "16">; -def VST1d32T_UPD : VST1D3WB<0b1000, "32">; -def VST1d64T_UPD : VST1D3WB<0b1100, "64">; +def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">; +def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">; +def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">; +def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; -def VST1d64TPseudo : VSTQQPseudo; -def VST1d64TPseudo_UPD : VSTQQWBPseudo; +def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; +def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; // ...with 4 registers (some of these are only for the disassembler): class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", - []>; + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", + []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} class VST1D4WB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", - "$addr.addr = $wb", []>; + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, + "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} -def VST1d8Q : VST1D4<0b0000, "8">; -def VST1d16Q : VST1D4<0b0100, "16">; -def VST1d32Q : VST1D4<0b1000, "32">; -def VST1d64Q : VST1D4<0b1100, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8">; +def VST1d16Q : VST1D4<{0,1,?,?}, "16">; +def VST1d32Q : VST1D4<{1,0,?,?}, "32">; +def VST1d64Q : VST1D4<{1,1,?,?}, "64">; -def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; -def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; -def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; -def VST1d64Q_UPD : VST1D4WB<0b1100, "64">; +def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">; +def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">; +def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">; +def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">; -def VST1d64QPseudo : VSTQQPseudo; -def VST1d64QPseudo_UPD : VSTQQWBPseudo; +def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; +def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), - IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), + IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} class VST2Q<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", - "", []>; + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", + "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} -def VST2d8 : VST2D<0b1000, 0b0000, "8">; -def VST2d16 : VST2D<0b1000, 0b0100, "16">; -def VST2d32 : VST2D<0b1000, 0b1000, "32">; +def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; +def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">; +def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">; -def VST2q8 : VST2Q<0b0000, "8">; -def VST2q16 : VST2Q<0b0100, "16">; -def VST2q32 : VST2Q<0b1000, "32">; +def VST2q8 : VST2Q<{0,0,?,?}, "8">; +def VST2q16 : VST2Q<{0,1,?,?}, "16">; +def VST2q32 : VST2Q<{1,0,?,?}, "32">; -def VST2d8Pseudo : VSTQPseudo; -def VST2d16Pseudo : VSTQPseudo; -def VST2d32Pseudo : VSTQPseudo; +def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; +def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; +def VST2d32Pseudo : VSTQPseudo<IIC_VST2>; -def VST2q8Pseudo : VSTQQPseudo; -def VST2q16Pseudo : VSTQQPseudo; -def VST2q32Pseudo : VSTQQPseudo; +def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; +def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; +def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), - IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", - "$addr.addr = $wb", []>; + (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), + IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} class VST2QWB<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", - "$addr.addr = $wb", []>; + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, + "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} -def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; -def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; -def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; +def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; +def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">; +def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">; -def VST2q8_UPD : VST2QWB<0b0000, "8">; -def VST2q16_UPD : VST2QWB<0b0100, "16">; -def VST2q32_UPD : VST2QWB<0b1000, "32">; +def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; +def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; +def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; -def VST2d8Pseudo_UPD : VSTQWBPseudo; -def VST2d16Pseudo_UPD : VSTQWBPseudo; -def VST2d32Pseudo_UPD : VSTQWBPseudo; +def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; +def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; +def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2q8Pseudo_UPD : VSTQQWBPseudo; -def VST2q16Pseudo_UPD : VSTQQWBPseudo; -def VST2q32Pseudo_UPD : VSTQQWBPseudo; +def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; // ...with double-spaced registers (for disassembly only): -def VST2b8 : VST2D<0b1001, 0b0000, "8">; -def VST2b16 : VST2D<0b1001, 0b0100, "16">; -def VST2b32 : VST2D<0b1001, 0b1000, "32">; -def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">; -def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">; -def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; +def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; +def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; +def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; +def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">; +def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">; +def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, + "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} -def VST3d8 : VST3D<0b0100, 0b0000, "8">; -def VST3d16 : VST3D<0b0100, 0b0100, "16">; -def VST3d32 : VST3D<0b0100, 0b1000, "32">; +def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; +def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; +def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo : VSTQQPseudo; -def VST3d16Pseudo : VSTQQPseudo; -def VST3d32Pseudo : VSTQQPseudo; +def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; +def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; +def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; // ...with address register writeback: class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", - "$addr.addr = $wb", []>; - -def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; -def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; -def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; - -def VST3d8Pseudo_UPD : VSTQQWBPseudo; -def VST3d16Pseudo_UPD : VSTQQWBPseudo; -def VST3d32Pseudo_UPD : VSTQQWBPseudo; - -// ...with double-spaced registers (non-updating versions for disassembly only): -def VST3q8 : VST3D<0b0101, 0b0000, "8">; -def VST3q16 : VST3D<0b0101, 0b0100, "16">; -def VST3q32 : VST3D<0b0101, 0b1000, "32">; -def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; -def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; -def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; - -def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, + "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} + +def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; +def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; +def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; + +def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; +def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; +def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; + +// ...with double-spaced registers: +def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; +def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; +def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; +def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; +def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; +def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; + +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; // ...alternate versions to be allocated odd register numbers: -def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; +def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; +def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; + +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; // VST4 : Vector Store (multiple 4-element structures) class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", - "", []>; + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", + "", []> { + let Rm = 0b1111; + let Inst{5-4} = Rn{5-4}; +} -def VST4d8 : VST4D<0b0000, 0b0000, "8">; -def VST4d16 : VST4D<0b0000, 0b0100, "16">; -def VST4d32 : VST4D<0b0000, 0b1000, "32">; +def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; +def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; +def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo : VSTQQPseudo; -def VST4d16Pseudo : VSTQQPseudo; -def VST4d32Pseudo : VSTQQPseudo; +def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; +def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; +def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; // ...with address register writeback: class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", - "$addr.addr = $wb", []>; - -def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; -def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; -def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; - -def VST4d8Pseudo_UPD : VSTQQWBPseudo; -def VST4d16Pseudo_UPD : VSTQQWBPseudo; -def VST4d32Pseudo_UPD : VSTQQWBPseudo; - -// ...with double-spaced registers (non-updating versions for disassembly only): -def VST4q8 : VST4D<0b0001, 0b0000, "8">; -def VST4q16 : VST4D<0b0001, 0b0100, "16">; -def VST4q32 : VST4D<0b0001, 0b1000, "32">; -def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; -def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; -def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; - -def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, + "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; +} + +def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; +def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; +def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; + +def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; +def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; +def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; + +// ...with double-spaced registers: +def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; +def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; +def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; +def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; +def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; +def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; + +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; // ...alternate versions to be allocated odd register numbers: -def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; +def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; +def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; + +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; + +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 + +// Classes for VST*LN pseudo-instructions with multi-register operands. +// These are expanded to real instructions after register allocation. +class VSTQLNPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), + itin, "">; +class VSTQLNWBPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb">; +class VSTQQLNPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), + itin, "">; +class VSTQQLNWBPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb">; +class VSTQQQQLNPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), + itin, "">; +class VSTQQQQLNWBPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, + nohash_imm:$lane), itin, "$addr.addr = $wb">; // VST1LN : Vector Store (single element from one lane) -// FIXME: Not yet implemented. +class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), + IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { + let Rm = 0b1111; +} +class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> + : VSTQLNPseudo<IIC_VST1ln> { + let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr)]; +} + +def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, + NEONvgetlaneu> { + let Inst{7-5} = lane{2-0}; +} +def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, + NEONvgetlaneu> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{5}; +} +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { + let Inst{7} = lane{0}; + let Inst{5-4} = Rn{5-4}; +} + +def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; +def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; +def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; + +def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), + (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; +def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), + (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { + +// ...with address register writeback: +class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, + "\\{$Vd[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []>; + +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{5}; +} +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5-4} = Rn{5-4}; +} + +def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; +def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; +def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>; // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", - "", []>; + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), + IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", + "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} + +def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} -def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">; -def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">; -def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">; +def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; +def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; +def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; // ...with double-spaced registers: -def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">; -def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">; +def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; + let Inst{4} = Rn{4}; +} +def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{4} = Rn{4}; +} -// ...alternate versions to be allocated odd register numbers: -def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">; -def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">; +def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; +def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; // ...with address register writeback: class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []>; + "$addr.addr = $wb", []> { + let Inst{4} = Rn{4}; +} + +def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { + let Inst{7} = lane{0}; +} + +def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; +def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; +def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; -def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">; -def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">; -def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">; +def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { + let Inst{7} = lane{0}; +} -def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">; -def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">; +def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; +def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, "vst3", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { + let Rm = 0b1111; +} -def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">; -def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">; -def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">; +def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; +def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; +def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; // ...with double-spaced registers: -def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">; -def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">; +def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} -// ...alternate versions to be allocated odd register numbers: -def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">; -def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">; +def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; +def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; // ...with address register writeback: class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), - IIC_VST, "vst3", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []>; + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VST3lnu, "vst3", Dt, + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []>; + +def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { + let Inst{7} = lane{0}; +} + +def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; +def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; +def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; -def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">; -def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">; -def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">; +def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { + let Inst{7} = lane{0}; +} -def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">; -def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">; +def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; +def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, "vst4", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", - "", []>; + : NLdStLn<1, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, + "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", + "", []> { + let Rm = 0b1111; + let Inst{4} = Rn{4}; +} + +def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} -def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">; -def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">; -def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">; +def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; +def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; +def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; // ...with double-spaced registers: -def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">; -def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">; +def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} -// ...alternate versions to be allocated odd register numbers: -def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">; -def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">; +def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; +def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; // ...with address register writeback: class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), - IIC_VST, "vst4", Dt, - "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []>; + : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VST4lnu, "vst4", Dt, + "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; +} -def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">; -def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">; -def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; +def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { + let Inst{7-5} = lane{2-0}; +} +def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} + +def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; +def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; +def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; + +def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { + let Inst{7-6} = lane{1-0}; +} +def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { + let Inst{7} = lane{0}; + let Inst{5} = Rn{5}; +} -def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; -def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; +def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; +def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 @@ -1000,98 +1687,92 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{ // Instruction Classes //===----------------------------------------------------------------------===// -// Basic 2-register operations: single-, double- and quad-register. -class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), - IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; +// Basic 2-register operations: double- and quad-register. class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), - (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "", - [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), + (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", + [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), - (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "", - [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), + (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", + [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), - (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), + (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), - (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), + (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; // Narrow 2-register operations. class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, SDNode OpNode> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), - (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), + (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; // Narrow 2-register intrinsics. class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), - (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), + (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; // Long 2-register operations (currently only used for VMOVL). class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), - (ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), + (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; + +// Long 2-register intrinsics. +class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp> + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), + (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> - : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), - (ins DPR:$src1, DPR:$src2), IIC_VPERMD, - OpcodeStr, Dt, "$dst1, $dst2", - "$src1 = $dst1, $src2 = $dst2", []>; + : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), + (ins DPR:$src1, DPR:$src2), IIC_VPERMD, + OpcodeStr, Dt, "$Vd, $Vm", + "$src1 = $Vd, $src2 = $Vm", []>; class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, InstrItinClass itin, string OpcodeStr, string Dt> - : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), - (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2", - "$src1 = $dst1, $src2 = $dst2", []>; - -// Basic 3-register operations: single-, double- and quad-register. -class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode OpNode, bit Commutable> - : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, - IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { - let isCommutable = Commutable; -} + : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), + (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", + "$src1 = $Vd, $src2 = $Vm", []>; +// Basic 3-register operations: double- and quad-register. class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { let isCommutable = Commutable; } // Same as N3VD but no data type. @@ -1100,31 +1781,31 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, - OpcodeStr, "$dst, $src1, $src2", "", - [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{ + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ let isCommutable = Commutable; } -class N3VDSL<bits<2> op21_20, bits<4> op11_8, +class N3VDSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set (Ty DPR:$dst), - (Ty (ShOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> { + (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set (Ty DPR:$Vd), + (Ty (ShOp (Ty DPR:$Vn), + (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { let isCommutable = 0; } -class N3VDSL16<bits<2> op21_20, bits<4> op11_8, +class N3VDSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", - [(set (Ty DPR:$dst), - (Ty (ShOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { + (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", + [(set (Ty DPR:$Vd), + (Ty (ShOp (Ty DPR:$Vn), + (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { let isCommutable = 0; } @@ -1132,40 +1813,40 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { let isCommutable = Commutable; } class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin, - OpcodeStr, "$dst, $src1, $src2", "", - [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{ + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, + OpcodeStr, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ let isCommutable = Commutable; } -class N3VQSL<bits<2> op21_20, bits<4> op11_8, +class N3VQSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set (ResTy QPR:$dst), - (ResTy (ShOp (ResTy QPR:$src1), - (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), + (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set (ResTy QPR:$Vd), + (ResTy (ShOp (ResTy QPR:$Vn), + (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), imm:$lane)))))]> { let isCommutable = 0; } class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","", - [(set (ResTy QPR:$dst), - (ResTy (ShOp (ResTy QPR:$src1), - (ResTy (NEONvduplane (OpTy DPR_8:$src2), + (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", + [(set (ResTy QPR:$Vd), + (ResTy (ShOp (ResTy QPR:$Vn), + (ResTy (NEONvduplane (OpTy DPR_8:$Vm), imm:$lane)))))]> { let isCommutable = 0; } @@ -1175,30 +1856,39 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { let isCommutable = Commutable; } -class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, +class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set (Ty DPR:$dst), - (Ty (IntOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_VFP2:$src2), + (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set (Ty DPR:$Vd), + (Ty (IntOp (Ty DPR:$Vn), + (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), imm:$lane)))))]> { let isCommutable = 0; } class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set (Ty DPR:$dst), - (Ty (IntOp (Ty DPR:$src1), - (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> { + (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set (Ty DPR:$Vd), + (Ty (IntOp (Ty DPR:$Vn), + (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { + let isCommutable = 0; +} +class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, + OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { let isCommutable = 0; } @@ -1206,20 +1896,20 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { let isCommutable = Commutable; } -class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, +class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set (ResTy QPR:$dst), - (ResTy (IntOp (ResTy QPR:$src1), - (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), + (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set (ResTy QPR:$Vd), + (ResTy (IntOp (ResTy QPR:$Vn), + (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), imm:$lane)))))]> { let isCommutable = 0; } @@ -1227,93 +1917,95 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set (ResTy QPR:$dst), - (ResTy (IntOp (ResTy QPR:$src1), - (ResTy (NEONvduplane (OpTy DPR_8:$src2), + (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set (ResTy QPR:$Vd), + (ResTy (IntOp (ResTy QPR:$Vn), + (ResTy (NEONvduplane (OpTy DPR_8:$Vm), imm:$lane)))))]> { let isCommutable = 0; } +class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + Format f, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, + OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { + let isCommutable = 0; +} -// Multiply-Add/Sub operations: single-, double- and quad-register. -class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode OpNode> - : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; - +// Multiply-Add/Sub operations: double- and quad-register. class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode OpNode> + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set DPR:$dst, (Ty (OpNode DPR:$src1, - (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; + (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set DPR:$Vd, (Ty (OpNode DPR:$src1, + (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; + class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode ShOp> + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", - [(set (Ty DPR:$dst), + OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$src1), - (Ty (MulOp DPR:$src2, - (Ty (NEONvduplane (Ty DPR_VFP2:$src3), + (Ty (MulOp DPR:$Vn, + (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), imm:$lane)))))))]>; class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", - [(set (Ty DPR:$dst), + OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$src1), - (Ty (MulOp DPR:$src2, - (Ty (NEONvduplane (Ty DPR_8:$src3), + (Ty (MulOp DPR:$Vn, + (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))))]>; class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, - SDNode MulOp, SDNode OpNode> + SDPatternOperator MulOp, SDPatternOperator OpNode> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, (Ty (OpNode QPR:$src1, - (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; + (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, (Ty (OpNode QPR:$src1, + (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode MulOp, SDNode ShOp> + SDPatternOperator MulOp, SDPatternOperator ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", - [(set (ResTy QPR:$dst), + OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + [(set (ResTy QPR:$Vd), (ResTy (ShOp (ResTy QPR:$src1), - (ResTy (MulOp QPR:$src2, - (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), + (ResTy (MulOp QPR:$Vn, + (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), imm:$lane)))))))]>; class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), + (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", - [(set (ResTy QPR:$dst), + OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + [(set (ResTy QPR:$Vd), (ResTy (ShOp (ResTy QPR:$src1), - (ResTy (MulOp QPR:$src2, - (ResTy (NEONvduplane (OpTy DPR_8:$src3), + (ResTy (MulOp QPR:$Vn, + (ResTy (NEONvduplane (OpTy DPR_8:$Vm), imm:$lane)))))))]>; // Neon Intrinsic-Op instructions (VABA): double- and quad-register. @@ -1321,18 +2013,18 @@ class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set DPR:$dst, (Ty (OpNode DPR:$src1, - (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>; + (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set DPR:$Vd, (Ty (OpNode DPR:$src1, + (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, (Ty (OpNode QPR:$src1, - (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>; + (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, (Ty (OpNode QPR:$src1, + (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. @@ -1340,52 +2032,52 @@ class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), - (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; + (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), + (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), - (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; + (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), + (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; // Long Multiply-Add/Sub operations. class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, (OpNode (TyQ QPR:$src1), - (TyQ (MulOp (TyD DPR:$src2), - (TyD DPR:$src3)))))]>; + (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), + (TyQ (MulOp (TyD DPR:$Vn), + (TyD DPR:$Vm)))))]>; class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), + (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", - [(set QPR:$dst, + OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), - (TyQ (MulOp (TyD DPR:$src2), - (TyD (NEONvduplane (TyD DPR_VFP2:$src3), + (TyQ (MulOp (TyD DPR:$Vn), + (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), imm:$lane))))))]>; class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), + (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", - [(set QPR:$dst, + OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), - (TyQ (MulOp (TyD DPR:$src2), - (TyD (NEONvduplane (TyD DPR_8:$src3), + (TyQ (MulOp (TyD DPR:$Vn), + (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane))))))]>; // Long Intrinsic-Op vector operations with explicit extend (VABAL). @@ -1394,11 +2086,11 @@ class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, (OpNode (TyQ QPR:$src1), - (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2), - (TyD DPR:$src3)))))))]>; + (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), + (TyD DPR:$Vm)))))))]>; // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. @@ -1406,35 +2098,35 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, - (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; + (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, + (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), - (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), + (outs QPR:$Vd), + (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", - [(set (ResTy QPR:$dst), + OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + [(set (ResTy QPR:$Vd), (ResTy (IntOp (ResTy QPR:$src1), - (OpTy DPR:$src2), - (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), + (OpTy DPR:$Vn), + (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), imm:$lane)))))]>; class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), - (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), + (outs QPR:$Vd), + (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", - [(set (ResTy QPR:$dst), + OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + [(set (ResTy QPR:$Vd), (ResTy (IntOp (ResTy QPR:$src1), - (OpTy DPR:$src2), - (OpTy (NEONvduplane (OpTy DPR_8:$src3), + (OpTy DPR:$Vn), + (OpTy (NEONvduplane (OpTy DPR_8:$Vm), imm:$lane)))))]>; // Narrowing 3-register intrinsics. @@ -1442,9 +2134,9 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { + (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { let isCommutable = Commutable; } @@ -1453,29 +2145,29 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> { + (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> : N3V<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set QPR:$dst, - (TyQ (OpNode (TyD DPR:$src1), - (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>; + (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set QPR:$Vd, + (TyQ (OpNode (TyD DPR:$Vn), + (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> : N3V<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set QPR:$dst, - (TyQ (OpNode (TyD DPR:$src1), - (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>; + (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set QPR:$Vd, + (TyQ (OpNode (TyD DPR:$Vn), + (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; // Long 3-register operations with explicitly extended operands. class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, @@ -1483,10 +2175,10 @@ class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))), - (TyQ (ExtOp (TyD DPR:$src2)))))]> { + (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), + (TyQ (ExtOp (TyD DPR:$Vm)))))]> { let isCommutable = Commutable; } @@ -1496,10 +2188,10 @@ class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1), - (TyD DPR:$src2))))))]> { + (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), + (TyD DPR:$Vm))))))]> { let isCommutable = Commutable; } @@ -1508,30 +2200,30 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { + (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set (ResTy QPR:$dst), - (ResTy (IntOp (OpTy DPR:$src1), - (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), + (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set (ResTy QPR:$Vd), + (ResTy (IntOp (OpTy DPR:$Vn), + (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), imm:$lane)))))]>; class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", - [(set (ResTy QPR:$dst), - (ResTy (IntOp (OpTy DPR:$src1), - (OpTy (NEONvduplane (OpTy DPR_8:$src2), + (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + [(set (ResTy QPR:$Vd), + (ResTy (IntOp (OpTy DPR:$Vn), + (OpTy (NEONvduplane (OpTy DPR_8:$Vm), imm:$lane)))))]>; // Wide 3-register operations. @@ -1539,10 +2231,10 @@ class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD, - OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (OpNode (TyQ QPR:$src1), - (TyQ (ExtOp (TyD DPR:$src2)))))]> { + (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), + (TyQ (ExtOp (TyD DPR:$Vm)))))]> { let isCommutable = Commutable; } @@ -1551,16 +2243,16 @@ class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), - (ins DPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), + (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), - (ins QPR:$src), IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), + (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; // Pairwise long 2-register accumulate intrinsics, // both double- and quad-register. @@ -1570,17 +2262,17 @@ class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, - OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", - [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; + (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, + OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", + [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, - OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst", - [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; + (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, + OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; // Shift by immediate, // both double- and quad-register. @@ -1588,25 +2280,25 @@ class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin, - OpcodeStr, Dt, "$dst, $src, $SIMM", "", - [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; + (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin, + OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", + [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin, - OpcodeStr, Dt, "$dst, $src, $SIMM", "", - [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; + (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin, + OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", + [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; // Long shift by immediate. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm, - IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", - [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), + (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm, + IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", + [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; // Narrow shift by immediate. @@ -1614,42 +2306,42 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin, - OpcodeStr, Dt, "$dst, $src, $SIMM", "", - [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), + (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin, + OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", + [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; // Shift right by immediate and accumulate, // both double- and quad-register. class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, - OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", - [(set DPR:$dst, (Ty (add DPR:$src1, - (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, + OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", + [(set DPR:$Vd, (Ty (add DPR:$src1, + (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, - OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", - [(set QPR:$dst, (Ty (add QPR:$src1, - (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, + OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", + [(set QPR:$Vd, (Ty (add QPR:$src1, + (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> - : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD, - OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", - [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD, + OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", + [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> - : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ, - OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst", - [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ, + OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", + [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; // Convert, with fractional bits immediate, // both double- and quad-register. @@ -1657,16 +2349,16 @@ class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm, - IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "", - [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; + (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, + IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm, - IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "", - [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; + (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, + IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; //===----------------------------------------------------------------------===// // Multiclasses @@ -1678,45 +2370,127 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // S = single int (32 bit) elements // D = double int (64 bit) elements -// Neon 2-register vector operations -- for disassembly only. +// Neon 2-register vector operations and intrinsics. -// First with only element sizes of 8, 16 and 32 bits: +// Neon 2-register comparisons. +// source operand element sizes of 8, 16 and 32 bits: multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, string opc, string Dt, - string asm> { + string asm, SDNode OpNode> { // 64-bit vector types. def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, - opc, !strconcat(Dt, "8"), asm, "", []>; + (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, + opc, !strconcat(Dt, "8"), asm, "", + [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, - opc, !strconcat(Dt, "16"), asm, "", []>; + (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, + opc, !strconcat(Dt, "16"), asm, "", + [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, - opc, !strconcat(Dt, "32"), asm, "", []>; + (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, + opc, !strconcat(Dt, "32"), asm, "", + [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, - opc, "f32", asm, "", []> { + (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, + opc, "f32", asm, "", + [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { let Inst{10} = 1; // overwrite F = 1 } // 128-bit vector types. def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, - opc, !strconcat(Dt, "8"), asm, "", []>; + (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, + opc, !strconcat(Dt, "8"), asm, "", + [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, - opc, !strconcat(Dt, "16"), asm, "", []>; + (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, + opc, !strconcat(Dt, "16"), asm, "", + [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, - opc, !strconcat(Dt, "32"), asm, "", []>; + (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, + opc, !strconcat(Dt, "32"), asm, "", + [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, - opc, "f32", asm, "", []> { + (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, + opc, "f32", asm, "", + [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { let Inst{10} = 1; // overwrite F = 1 } } + +// Neon 2-register vector intrinsics, +// element sizes of 8, 16 and 32 bits: +multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, string Dt, Intrinsic IntOp> { + // 64-bit vector types. + def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, + itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; + def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, + itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; + def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, + itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; + + // 128-bit vector types. + def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, + itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; + def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, + itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; + def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, + itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; +} + + +// Neon Narrowing 2-register vector operations, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, OpNode>; + def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, OpNode>; + def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, OpNode>; +} + +// Neon Narrowing 2-register vector intrinsics, +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp> { + def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, IntOp>; + def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, IntOp>; + def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, IntOp>; +} + + +// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). +// source operand element sizes of 16, 32 and 64 bits: +multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, string Dt, SDNode OpNode> { + def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; + def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; + def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; +} + + // Neon 3-register vector operations. // First with only element sizes of 8, 16 and 32 bits: @@ -1726,7 +2500,7 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, SDNode OpNode, bit Commutable = 0> { // 64-bit vector types. - def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, + def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, OpNode, Commutable>; def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, @@ -1775,54 +2549,6 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } -// Neon Narrowing 2-register vector operations, -// source operand element sizes of 16, 32 and 64 bits: -multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { - def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, - itin, OpcodeStr, !strconcat(Dt, "16"), - v8i8, v8i16, OpNode>; - def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, - itin, OpcodeStr, !strconcat(Dt, "32"), - v4i16, v4i32, OpNode>; - def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, - itin, OpcodeStr, !strconcat(Dt, "64"), - v2i32, v2i64, OpNode>; -} - -// Neon Narrowing 2-register vector intrinsics, -// source operand element sizes of 16, 32 and 64 bits: -multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp> { - def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, - itin, OpcodeStr, !strconcat(Dt, "16"), - v8i8, v8i16, IntOp>; - def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, - itin, OpcodeStr, !strconcat(Dt, "32"), - v4i16, v4i32, IntOp>; - def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, - itin, OpcodeStr, !strconcat(Dt, "64"), - v2i32, v2i64, IntOp>; -} - - -// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). -// source operand element sizes of 16, 32 and 64 bits: -multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, string Dt, SDNode OpNode> { - def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; - def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; - def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; -} - - // Neon 3-register vector intrinsics. // First with only element sizes of 16 and 32 bits: @@ -1847,8 +2573,29 @@ multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp, Commutable>; } +multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + Intrinsic IntOp> { + // 64-bit vector types. + def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, + OpcodeStr, !strconcat(Dt, "16"), + v4i16, v4i16, IntOp>; + def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, + OpcodeStr, !strconcat(Dt, "32"), + v2i32, v2i32, IntOp>; + + // 128-bit vector types. + def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, + OpcodeStr, !strconcat(Dt, "16"), + v8i16, v8i16, IntOp>; + def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, + OpcodeStr, !strconcat(Dt, "32"), + v4i32, v4i32, IntOp>; +} -multiclass N3VIntSL_HS<bits<4> op11_8, +multiclass N3VIntSL_HS<bits<4> op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, Intrinsic IntOp> { @@ -1877,6 +2624,21 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp, Commutable>; } +multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + Intrinsic IntOp> + : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, + OpcodeStr, Dt, IntOp> { + def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, IntOp>; + def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, IntOp>; +} + // ....then also with element size of 64 bits: multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, @@ -1893,6 +2655,20 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, OpcodeStr, !strconcat(Dt, "64"), v2i64, v2i64, IntOp, Commutable>; } +multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + Intrinsic IntOp> + : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, + OpcodeStr, Dt, IntOp> { + def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, IntOp>; + def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, IntOp>; +} // Neon Narrowing 3-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: @@ -1920,7 +2696,7 @@ multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode, Commutable>; - def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, + def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode, Commutable>; def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, @@ -1944,7 +2720,7 @@ multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode, ExtOp, Commutable>; - def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, + def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode, ExtOp, Commutable>; def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, @@ -1959,7 +2735,7 @@ multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, + def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp, Commutable>; def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, @@ -1970,7 +2746,7 @@ multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp> { - def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, + def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; @@ -1995,7 +2771,7 @@ multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp, ExtOp, Commutable>; - def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, + def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp, ExtOp, Commutable>; def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, @@ -2044,7 +2820,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; } -multiclass N3VMulOpSL_HS<bits<4> op11_8, +multiclass N3VMulOpSL_HS<bits<4> op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, SDNode ShOp> { @@ -2174,30 +2950,6 @@ multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, } -// Neon 2-register vector intrinsics, -// element sizes of 8, 16 and 32 bits: -multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op4, - InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp> { - // 64-bit vector types. - def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; - def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; - def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; - - // 128-bit vector types. - def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; - def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; - def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, - itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; -} - - // Neon Pairwise long 2-register intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, @@ -2461,9 +3213,9 @@ def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; -def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", +def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", v2f32, v2f32, fmul, 1>; -def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", +def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; @@ -2491,7 +3243,7 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, - IIC_VMULi16Q, IIC_VMULi32Q, + IIC_VMULi16Q, IIC_VMULi32Q, "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, @@ -2555,15 +3307,19 @@ defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", - v2f32, fmul, fadd>; + v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON, UseFPVMLx]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", - v4f32, fmul, fadd>; + v4f32, fmul_su, fadd_mlx>, + Requires<[HasNEON, UseFPVMLx]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", - v2f32, fmul, fadd>; + v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON, UseFPVMLx]>; def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", - v4f32, v2f32, fmul, fadd>; + v4f32, v2f32, fmul_su, fadd_mlx>, + Requires<[HasNEON, UseFPVMLx]>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -2581,14 +3337,15 @@ def : Pat<(v4i32 (add (v4i32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), - (fmul (v4f32 QPR:$src2), +def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), + (fmul_su (v4f32 QPR:$src2), (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), (v4f32 (VMLAslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v2f32 (EXTRACT_SUBREG QPR:$src3, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; + (SubReg_i32_lane imm:$lane)))>, + Requires<[HasNEON, UseFPVMLx]>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, @@ -2608,15 +3365,19 @@ defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", - v2f32, fmul, fsub>; + v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON, UseFPVMLx]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", - v4f32, fmul, fsub>; + v4f32, fmul_su, fsub_mlx>, + Requires<[HasNEON, UseFPVMLx]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", - v2f32, fmul, fsub>; + v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON, UseFPVMLx]>; def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", - v4f32, v2f32, fmul, fsub>; + v4f32, v2f32, fmul_su, fsub_mlx>, + Requires<[HasNEON, UseFPVMLx]>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -2634,13 +3395,14 @@ def : Pat<(v4i32 (sub (v4i32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), - (fmul (v4f32 QPR:$src2), +def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), + (fmul_su (v4f32 QPR:$src2), (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v2f32 (EXTRACT_SUBREG QPR:$src3, (DSubReg_i32_reg imm:$lane))), - (SubReg_i32_lane imm:$lane)))>; + (SubReg_i32_lane imm:$lane)))>, + Requires<[HasNEON, UseFPVMLx]>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, @@ -2703,25 +3465,24 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; -// For disassembly only. + defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$dst, $src, #0">; + "$Vd, $Vm, #0", NEONvceqz>; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, +defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; -// For disassembly only. + defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$dst, $src, #0">; -// For disassembly only. + "$Vd, $Vm, #0", NEONvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$dst, $src, #0">; + "$Vd, $Vm, #0", NEONvclez>; // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -2732,12 +3493,11 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; -// For disassembly only. + defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$dst, $src, #0">; -// For disassembly only. + "$Vd, $Vm, #0", NEONvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$dst, $src, #0">; + "$Vd, $Vm, #0", NEONvcltz>; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", @@ -2750,7 +3510,7 @@ def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits -defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, +defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; // Vector Bitwise Operations. @@ -2779,104 +3539,190 @@ def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; +def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, + (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + IIC_VMOVImm, + "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, + (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + IIC_VMOVImm, + "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + +def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, + (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + IIC_VMOVImm, + "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, + (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + IIC_VMOVImm, + "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + + // VBIC : Vector Bitwise Bit Clear (AND NOT) -def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, - "vbic", "$dst, $src1, $src2", "", - [(set DPR:$dst, (v2i32 (and DPR:$src1, - (vnotd DPR:$src2))))]>; -def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, - "vbic", "$dst, $src1, $src2", "", - [(set QPR:$dst, (v4i32 (and QPR:$src1, - (vnotq QPR:$src2))))]>; +def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, + "vbic", "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (v2i32 (and DPR:$Vn, + (vnotd DPR:$Vm))))]>; +def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), + (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, + "vbic", "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (v4i32 (and QPR:$Vn, + (vnotq QPR:$Vm))))]>; + +def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, + (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + IIC_VMOVImm, + "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, + (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + IIC_VMOVImm, + "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} + +def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, + (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + IIC_VMOVImm, + "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, + (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + IIC_VMOVImm, + "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + let Inst{10-9} = SIMM{10-9}; +} // VORN : Vector Bitwise OR NOT -def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, - "vorn", "$dst, $src1, $src2", "", - [(set DPR:$dst, (v2i32 (or DPR:$src1, - (vnotd DPR:$src2))))]>; -def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ, - "vorn", "$dst, $src1, $src2", "", - [(set QPR:$dst, (v4i32 (or QPR:$src1, - (vnotq QPR:$src2))))]>; +def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, + "vorn", "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (v2i32 (or DPR:$Vn, + (vnotd DPR:$Vm))))]>; +def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), + (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, + "vorn", "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (v4i32 (or QPR:$Vn, + (vnotq QPR:$Vm))))]>; // VMVN : Vector Bitwise NOT (Immediate) let isReMaterializable = 1 in { -def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst), + +def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmvn", "i16", "$dst, $SIMM", "", - [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>; -def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst), + "vmvn", "i16", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmvn", "i16", "$dst, $SIMM", "", - [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>; + "vmvn", "i16", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} -def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst), +def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmvn", "i32", "$dst, $SIMM", "", - [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>; -def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst), + "vmvn", "i32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { + let Inst{11-8} = SIMM{11-8}; +} + +def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmvn", "i32", "$dst, $SIMM", "", - [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>; + "vmvn", "i32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { + let Inst{11-8} = SIMM{11-8}; +} } // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, - "vmvn", "$dst, $src", "", - [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>; + (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, + "vmvn", "$Vd, $Vm", "", + [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, - "vmvn", "$dst, $src", "", - [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>; + (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, + "vmvn", "$Vd, $Vm", "", + [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select -def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR:$src3), +def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VCNTiD, - "vbsl", "$dst, $src2, $src3", "$src1 = $dst", - [(set DPR:$dst, - (v2i32 (or (and DPR:$src2, DPR:$src1), - (and DPR:$src3, (vnotd DPR:$src1)))))]>; -def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, QPR:$src3), + "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set DPR:$Vd, + (v2i32 (or (and DPR:$Vn, DPR:$src1), + (and DPR:$Vm, (vnotd DPR:$src1)))))]>; +def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VCNTiQ, - "vbsl", "$dst, $src2, $src3", "$src1 = $dst", - [(set QPR:$dst, - (v4i32 (or (and QPR:$src2, QPR:$src1), - (and QPR:$src3, (vnotq QPR:$src1)))))]>; + "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", + [(set QPR:$Vd, + (v4i32 (or (and QPR:$Vn, QPR:$src1), + (and QPR:$Vm, (vnotq QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", +// FIXME: This instruction's encoding MAY NOT BE correct. def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), + (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, - "vbif", "$dst, $src2, $src3", "$src1 = $dst", + "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", [/* For disassembly only; pattern left blank */]>; def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), + (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, - "vbif", "$dst, $src2, $src3", "$src1 = $dst", + "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", [/* For disassembly only; pattern left blank */]>; // VBIT : Vector Bitwise Insert if True // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", +// FIXME: This instruction's encoding MAY NOT BE correct. def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), + (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, - "vbit", "$dst, $src2, $src3", "$src1 = $dst", + "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", [/* For disassembly only; pattern left blank */]>; def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), + (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, - "vbit", "$dst, $src2, $src3", "$src1 = $dst", + "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", [/* For disassembly only; pattern left blank */]>; // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking @@ -2957,8 +3803,8 @@ def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, "vpadd", "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, - IIC_VBIND, "vpadd", "f32", +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, + IIC_VPBIND, "vpadd", "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long @@ -2986,7 +3832,7 @@ def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum @@ -3002,16 +3848,16 @@ def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin", +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. // VRECPE : Vector Reciprocal Estimate -def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, +def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, IIC_VUNAD, "vrecpe", "u32", v2i32, v2i32, int_arm_neon_vrecpe>; -def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, +def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, IIC_VUNAQ, "vrecpe", "u32", v4i32, v4i32, int_arm_neon_vrecpe>; def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, @@ -3039,7 +3885,7 @@ def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, IIC_VUNAD, "vrsqrte", "f32", v2f32, v2f32, int_arm_neon_vrsqrte>; -def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, +def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, IIC_VUNAQ, "vrsqrte", "f32", v4f32, v4f32, int_arm_neon_vrsqrte>; @@ -3054,12 +3900,12 @@ def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, // Vector Shifts. // VSHL : Vector Shift -defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm, +defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, - "vshl", "s", int_arm_neon_vshifts, 0>; -defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm, + "vshl", "s", int_arm_neon_vshifts>; +defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, - "vshl", "u", int_arm_neon_vshiftu, 0>; + "vshl", "u", int_arm_neon_vshiftu>; // VSHL : Vector Shift Left (Immediate) defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, N2RegVShLFrm>; @@ -3093,12 +3939,12 @@ defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>; // VRSHL : Vector Rounding Shift -defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm, +defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vrshl", "s", int_arm_neon_vrshifts, 0>; -defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm, + "vrshl", "s", int_arm_neon_vrshifts>; +defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vrshl", "u", int_arm_neon_vrshiftu, 0>; + "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, N2RegVShRFrm>; @@ -3110,12 +3956,12 @@ defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", NEONvrshrn>; // VQSHL : Vector Saturating Shift -defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm, +defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vqshl", "s", int_arm_neon_vqshifts, 0>; -defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm, + "vqshl", "s", int_arm_neon_vqshifts>; +defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vqshl", "u", int_arm_neon_vqshiftu, 0>; + "vqshl", "u", int_arm_neon_vqshiftu>; // VQSHL : Vector Saturating Shift Left (Immediate) defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, N2RegVShLFrm>; @@ -3136,12 +3982,12 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm, +defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vqrshl", "s", int_arm_neon_vqrshifts, 0>; -defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm, + "vqrshl", "s", int_arm_neon_vqrshifts>; +defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, - "vqrshl", "u", int_arm_neon_vqrshiftu, 0>; + "vqrshl", "u", int_arm_neon_vqrshiftu>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", @@ -3168,7 +4014,7 @@ defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; // Vector Absolute and Saturating Absolute. // VABS : Vector Absolute Value -defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, +defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, @@ -3179,7 +4025,7 @@ def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, v4f32, v4f32, int_arm_neon_vabs>; // VQABS : Vector Saturating Absolute Value -defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, +defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", int_arm_neon_vqabs>; @@ -3191,13 +4037,13 @@ def vnegq : PatFrag<(ops node:$in), (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), - IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>; + : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), + IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), - IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>; + : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), + IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; // VNEG : Vector Negate (integer) def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; @@ -3209,13 +4055,13 @@ def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; // VNEG : Vector Negate (floating-point) def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, - "vneg", "f32", "$dst, $src", "", - [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; + (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, + "vneg", "f32", "$Vd, $Vm", "", + [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, - (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, - "vneg", "f32", "$dst, $src", "", - [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; + (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, + "vneg", "f32", "$Vd, $Vm", "", + [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; @@ -3225,22 +4071,22 @@ def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate -defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, +defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", int_arm_neon_vqneg>; // Vector Bit Counting Operations. // VCLS : Vector Count Leading Sign Bits -defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, +defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", int_arm_neon_vcls>; // VCLZ : Vector Count Leading Zeros -defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, +defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", int_arm_neon_vclz>; // VCNT : Vector Count One Bits -def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, +def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiD, "vcnt", "8", v8i8, v8i8, int_arm_neon_vcnt>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, @@ -3249,98 +4095,126 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // Vector Swap -- for disassembly only. def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, - "vswp", "$dst, $src", "", []>; + (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, + "vswp", "$Vd, $Vm", "", []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, - "vswp", "$dst, $src", "", []>; + (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, + "vswp", "$Vd, $Vm", "", []>; // Vector Move Operations. // VMOV : Vector Move (Register) let neverHasSideEffects = 1 in { -def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; -def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; +def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm), + N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> { + let Vn{4-0} = Vm{4-0}; +} +def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm), + N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> { + let Vn{4-0} = Vm{4-0}; +} // Pseudo vector move instructions for QQ and QQQQ registers. This should // be expanded after register allocation is completed. def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), - NoItinerary, "${:comment} vmov\t$dst, $src", []>; + NoItinerary, []>; def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), - NoItinerary, "${:comment} vmov\t$dst, $src", []>; + NoItinerary, []>; } // neverHasSideEffects // VMOV : Vector Move (Immediate) let isReMaterializable = 1 in { -def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmov", "i8", "$dst, $SIMM", "", - [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), + "vmov", "i8", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; +def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmov", "i8", "$dst, $SIMM", "", - [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>; + "vmov", "i8", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmov", "i16", "$dst, $SIMM", "", - [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + "vmov", "i16", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmov", "i16", "$dst, $SIMM", "", - [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>; + "vmov", "i16", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} -def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmov", "i32", "$dst, $SIMM", "", - [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst), + "vmov", "i32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { + let Inst{11-8} = SIMM{11-8}; +} + +def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmov", "i32", "$dst, $SIMM", "", - [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>; + "vmov", "i32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { + let Inst{11-8} = SIMM{11-8}; +} -def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), +def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmov", "i64", "$dst, $SIMM", "", - [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>; -def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), + "vmov", "i64", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; +def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), (ins nModImm:$SIMM), IIC_VMOVImm, - "vmov", "i64", "$dst, $SIMM", "", - [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>; + "vmov", "i64", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; } // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, - (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]", - [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), - imm:$lane))]>; + (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), + IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]", + [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{2}; + let Inst{6-5} = lane{1-0}; +} def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, - (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]", - [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), - imm:$lane))]>; + (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), + IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]", + [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{1}; + let Inst{6} = lane{0}; +} def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, - (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]", - [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), - imm:$lane))]>; + (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), + IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]", + [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{2}; + let Inst{6-5} = lane{1-0}; +} def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, - (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]", - [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), - imm:$lane))]>; + (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), + IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]", + [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{1}; + let Inst{6} = lane{0}; +} def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, - (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]", - [(set GPR:$dst, (extractelt (v2i32 DPR:$src), - imm:$lane))]>; + (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), + IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]", + [(set GPR:$R, (extractelt (v2i32 DPR:$V), + imm:$lane))]> { + let Inst{21} = lane{0}; +} // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -3376,37 +4250,45 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), // VMOV : Vector Set Lane (move ARM core register to scalar) -let Constraints = "$src1 = $dst" in { -def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst), - (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2", - [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), - GPR:$src2, imm:$lane))]>; -def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst), - (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2", - [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), - GPR:$src2, imm:$lane))]>; -def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst), - (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2", - [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), - GPR:$src2, imm:$lane))]>; +let Constraints = "$src1 = $V" in { +def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), + (ins DPR:$src1, GPR:$R, nohash_imm:$lane), + IIC_VMOVISL, "vmov", "8", "$V[$lane], $R", + [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), + GPR:$R, imm:$lane))]> { + let Inst{21} = lane{2}; + let Inst{6-5} = lane{1-0}; +} +def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), + (ins DPR:$src1, GPR:$R, nohash_imm:$lane), + IIC_VMOVISL, "vmov", "16", "$V[$lane], $R", + [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), + GPR:$R, imm:$lane))]> { + let Inst{21} = lane{1}; + let Inst{6} = lane{0}; +} +def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), + (ins DPR:$src1, GPR:$R, nohash_imm:$lane), + IIC_VMOVISL, "vmov", "32", "$V[$lane], $R", + [(set DPR:$V, (insertelt (v2i32 DPR:$src1), + GPR:$R, imm:$lane))]> { + let Inst{21} = lane{0}; +} } def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), - (v16i8 (INSERT_SUBREG QPR:$src1, + (v16i8 (INSERT_SUBREG QPR:$src1, (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, (DSubReg_i8_reg imm:$lane))), GPR:$src2, (SubReg_i8_lane imm:$lane))), (DSubReg_i8_reg imm:$lane)))>; def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), - (v8i16 (INSERT_SUBREG QPR:$src1, + (v8i16 (INSERT_SUBREG QPR:$src1, (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, (DSubReg_i16_reg imm:$lane))), GPR:$src2, (SubReg_i16_lane imm:$lane))), (DSubReg_i16_reg imm:$lane)))>; def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), - (v4i32 (INSERT_SUBREG QPR:$src1, + (v4i32 (INSERT_SUBREG QPR:$src1, (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, (DSubReg_i32_reg imm:$lane))), GPR:$src2, (SubReg_i32_lane imm:$lane))), @@ -3454,13 +4336,13 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)), // VDUP : Vector Duplicate (from ARM core register to all elements) class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> - : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", Dt, "$dst, $src", - [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; + : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), + IIC_VMOVIS, "vdup", Dt, "$V, $R", + [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> - : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", Dt, "$dst, $src", - [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; + : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), + IIC_VMOVIS, "vdup", Dt, "$V, $R", + [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; @@ -3469,40 +4351,56 @@ def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", "32", "$dst, $src", - [(set DPR:$dst, (v2f32 (NEONvdup - (f32 (bitconvert GPR:$src)))))]>; -def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", "32", "$dst, $src", - [(set QPR:$dst, (v4f32 (NEONvdup - (f32 (bitconvert GPR:$src)))))]>; +def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R), + IIC_VMOVIS, "vdup", "32", "$V, $R", + [(set DPR:$V, (v2f32 (NEONvdup + (f32 (bitconvert GPR:$R)))))]>; +def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R), + IIC_VMOVIS, "vdup", "32", "$V, $R", + [(set QPR:$V, (v4f32 (NEONvdup + (f32 (bitconvert GPR:$R)))))]>; // VDUP : Vector Duplicate Lane (from scalar to all elements) class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, ValueType Ty> - : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", - [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; + : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane), + IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]", + [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy> - : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]", - [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), + : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane), + IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]", + [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), imm:$lane)))]>; // Inst{19-16} is partially specified depending on the element size. -def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>; -def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>; -def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>; -def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>; -def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>; -def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>; -def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>; -def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>; +def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> { + let Inst{19-17} = lane{2-0}; +} +def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> { + let Inst{19-18} = lane{1-0}; +} +def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> { + let Inst{19} = lane{0}; +} +def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> { + let Inst{19} = lane{0}; +} +def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> { + let Inst{19-17} = lane{2-0}; +} +def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> { + let Inst{19-18} = lane{1-0}; +} +def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> { + let Inst{19} = lane{0}; +} +def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> { + let Inst{19} = lane{0}; +} def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -3521,18 +4419,13 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0, - (outs DPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", +def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; - -def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, - (outs QPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", +def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, +defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, "vmovn", "i", trunc>; // VQMOVN : Vector Saturating Narrowing Move defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, @@ -3585,20 +4478,30 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +// VCVT : Vector Convert Between Half-Precision and Single-Precision. +def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, + IIC_VUNAQ, "vcvt", "f16.f32", + v4i16, v4f32, int_arm_neon_vcvtfp2hf>, + Requires<[HasNEON, HasFP16]>; +def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, + IIC_VUNAQ, "vcvt", "f32.f16", + v4f32, v4i16, int_arm_neon_vcvthf2fp>, + Requires<[HasNEON, HasFP16]>; + // Vector Reverse. // VREV64 : Vector Reverse elements within 64-bit doublewords class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), - (ins DPR:$src), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; + : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), + (ins DPR:$Vm), IIC_VMOVD, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), - (ins QPR:$src), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; + : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), + (ins QPR:$Vm), IIC_VMOVQ, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; @@ -3613,15 +4516,15 @@ def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; // VREV32 : Vector Reverse elements within 32-bit words class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), - (ins DPR:$src), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; + : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), + (ins DPR:$Vm), IIC_VMOVD, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), - (ins QPR:$src), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; + : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), + (ins QPR:$Vm), IIC_VMOVQ, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; @@ -3632,46 +4535,91 @@ def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; // VREV16 : Vector Reverse elements within 16-bit halfwords class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), - (ins DPR:$src), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src", "", - [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; + : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), + (ins DPR:$Vm), IIC_VMOVD, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> - : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), - (ins QPR:$src), IIC_VMOVD, - OpcodeStr, Dt, "$dst, $src", "", - [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; + : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), + (ins QPR:$Vm), IIC_VMOVQ, + OpcodeStr, Dt, "$Vd, $Vm", "", + [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; // Other Vector Shuffles. +// Aligned extractions: really just dropping registers + +class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> + : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), + (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; + +def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; + +def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; + +def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; + +def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; + +def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; + + // VEXT : Vector Extract class VEXTd<string OpcodeStr, string Dt, ValueType Ty> - : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm, - IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", - [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), - (Ty DPR:$rhs), imm:$index)))]>; + : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm, + IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", + [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), + (Ty DPR:$Vm), imm:$index)))]> { + bits<4> index; + let Inst{11-8} = index{3-0}; +} class VEXTq<string OpcodeStr, string Dt, ValueType Ty> - : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm, - IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", - [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), - (Ty QPR:$rhs), imm:$index)))]>; - -def VEXTd8 : VEXTd<"vext", "8", v8i8>; -def VEXTd16 : VEXTd<"vext", "16", v4i16>; -def VEXTd32 : VEXTd<"vext", "32", v2i32>; -def VEXTdf : VEXTd<"vext", "32", v2f32>; - -def VEXTq8 : VEXTq<"vext", "8", v16i8>; -def VEXTq16 : VEXTq<"vext", "16", v8i16>; -def VEXTq32 : VEXTq<"vext", "32", v4i32>; -def VEXTqf : VEXTq<"vext", "32", v4f32>; + : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), + (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm, + IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", + [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), + (Ty QPR:$Vm), imm:$index)))]> { + bits<4> index; + let Inst{11-8} = index{3-0}; +} + +def VEXTd8 : VEXTd<"vext", "8", v8i8> { + let Inst{11-8} = index{3-0}; +} +def VEXTd16 : VEXTd<"vext", "16", v4i16> { + let Inst{11-9} = index{2-0}; + let Inst{8} = 0b0; +} +def VEXTd32 : VEXTd<"vext", "32", v2i32> { + let Inst{11-10} = index{1-0}; + let Inst{9-8} = 0b00; +} +def VEXTdf : VEXTd<"vext", "32", v2f32> { + let Inst{11} = index{0}; + let Inst{10-8} = 0b000; +} + +def VEXTq8 : VEXTq<"vext", "8", v16i8> { + let Inst{11-8} = index{3-0}; +} +def VEXTq16 : VEXTq<"vext", "16", v8i16> { + let Inst{11-9} = index{2-0}; + let Inst{8} = 0b0; +} +def VEXTq32 : VEXTq<"vext", "32", v4i32> { + let Inst{11-10} = index{1-0}; + let Inst{9-8} = 0b00; +} +def VEXTqf : VEXTq<"vext", "32", v4f32> { + let Inst{11} = index{0}; + let Inst{10-8} = 0b000; +} // VTRN : Vector Transpose @@ -3707,160 +4655,120 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; // VTBL : Vector Table Lookup def VTBL1 - : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1, - "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; + : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, + "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "", + [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBL2 - : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>; + : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, + "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; def VTBL3 - : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>; + : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, + "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; def VTBL4 - : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), + : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>; + "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 +def VTBL2Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; +def VTBL3Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; +def VTBL4Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; + // VTBX : Vector Table Extension def VTBX1 - : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1, - "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 - DPR:$orig, DPR:$tbl1, DPR:$src)))]>; + : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), + (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, + "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd", + [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 + DPR:$orig, DPR:$Vn, DPR:$Vm)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 - : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>; + : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), + (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; def VTBX3 - : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), + : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), + (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", - "$orig = $dst", []>; + "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", + "$orig = $Vd", []>; def VTBX4 - : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", - "$orig = $dst", []>; + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, + DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, + "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", + "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 +def VTBX2Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), + IIC_VTBX2, "$orig = $dst", []>; +def VTBX3Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), + IIC_VTBX3, "$orig = $dst", []>; +def VTBX4Pseudo + : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), + IIC_VTBX4, "$orig = $dst", []>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// -class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> - : NEONFPPat<(ResTy (OpNode SPR:$a)), - (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), - SPR:$a, ssub_0))), - ssub_0)>; +class N2VSPat<SDNode OpNode, NeonI Inst> + : NEONFPPat<(f32 (OpNode SPR:$a)), + (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; class N3VSPat<SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), - (EXTRACT_SUBREG (v2f32 - (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, ssub_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, ssub_0))), - ssub_0)>; + (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$a, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), - (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, ssub_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, ssub_0), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, ssub_0)), - ssub_0)>; - -// These need separate instructions because they must use DPR_VFP2 register -// class which have SPR sub-registers. - -// Vector Add Operations used for single-precision FP -let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>; -def : N3VSPat<fadd, VADDfd_sfp>; - -// Vector Sub Operations used for single-precision FP -let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>; -def : N3VSPat<fsub, VSUBfd_sfp>; - -// Vector Multiply Operations used for single-precision FP -let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>; -def : N3VSPat<fmul, VMULfd_sfp>; - -// Vector Multiply-Accumulate/Subtract used for single-precision FP -// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so -// we want to avoid them for now. e.g., alternating vmla/vadd instructions. - -//let neverHasSideEffects = 1 in -//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32", -// v2f32, fmul, fadd>; -//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>; - -//let neverHasSideEffects = 1 in -//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32", -// v2f32, fmul, fsub>; -//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>; - -// Vector Absolute used for single-precision FP -let neverHasSideEffects = 1 in -def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vabs", "f32", "$dst, $src", "", []>; -def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>; - -// Vector Negate used for single-precision FP -let neverHasSideEffects = 1 in -def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vneg", "f32", "$dst, $src", "", []>; -def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>; - -// Vector Maximum used for single-precision FP -let neverHasSideEffects = 1 in -def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, - "vmax", "f32", "$dst, $src1, $src2", "", []>; -def : N3VSPat<NEONfmax, VMAXfd_sfp>; - -// Vector Minimum used for single-precision FP -let neverHasSideEffects = 1 in -def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND, - "vmin", "f32", "$dst, $src1, $src2", "", []>; -def : N3VSPat<NEONfmin, VMINfd_sfp>; - -// Vector Convert between single-precision FP and integer -let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", - v2i32, v2f32, fp_to_sint>; -def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>; - -let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", - v2i32, v2f32, fp_to_uint>; -def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>; - -let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", - v2f32, v2i32, sint_to_fp>; -def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>; - -let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", - v2f32, v2i32, uint_to_fp>; -def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>; + (EXTRACT_SUBREG + (v2f32 (COPY_TO_REGCLASS (Inst + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$acc, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$a, ssub_0), + (INSERT_SUBREG + (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), + SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; + +def : N3VSPat<fadd, VADDfd>; +def : N3VSPat<fsub, VSUBfd>; +def : N3VSPat<fmul, VMULfd>; +def : N3VSMulOpPat<fmul, fadd, VMLAfd>, + Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; +def : N3VSMulOpPat<fmul, fsub, VMLSfd>, + Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; +def : N2VSPat<fabs, VABSfd>; +def : N2VSPat<fneg, VNEGfd>; +def : N3VSPat<NEONfmax, VMAXfd>; +def : N3VSPat<NEONfmin, VMINfd>; +def : N2VSPat<arm_ftosi, VCVTf2sd>; +def : N2VSPat<arm_ftoui, VCVTf2ud>; +def : N2VSPat<arm_sitof, VCVTs2fd>; +def : N2VSPat<arm_uitof, VCVTu2fd>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index a13ff12..826ef46 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1,4 +1,4 @@ -//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===// +//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -16,7 +16,7 @@ // def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def imm_neg_XFORM : SDNodeXForm<imm, [{ @@ -26,7 +26,6 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32); }]>; - /// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7]. def imm0_7 : PatLeaf<(i32 imm), [{ return (uint32_t)N->getZExtValue() < 8; @@ -50,9 +49,9 @@ def imm8_255_neg : PatLeaf<(i32 imm), [{ return Val >= 8 && Val < 256; }], imm_neg_XFORM>; -// Break imm's up into two pieces: an immediate + a left shift. -// This uses thumb_immshifted to match and thumb_immshifted_val and -// thumb_immshifted_shamt to get the val/shift pieces. +// Break imm's up into two pieces: an immediate + a left shift. This uses +// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt +// to get the val/shift pieces. def thumb_immshifted : PatLeaf<(imm), [{ return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue()); }]>; @@ -67,6 +66,11 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(V, MVT::i32); }]>; +// ADR instruction labels. +def t_adrlabel : Operand<i32> { + let EncoderMethod = "getThumbAdrLabelOpValue"; +} + // Scaled 4 immediate. def t_imm_s4 : Operand<i32> { let PrintMethod = "printThumbS4ImmOperand"; @@ -74,47 +78,114 @@ def t_imm_s4 : Operand<i32> { // Define Thumb specific addressing modes. +def t_brtarget : Operand<OtherVT> { + let EncoderMethod = "getThumbBRTargetOpValue"; +} + +def t_bcctarget : Operand<i32> { + let EncoderMethod = "getThumbBCCTargetOpValue"; +} + +def t_cbtarget : Operand<i32> { + let EncoderMethod = "getThumbCBTargetOpValue"; +} + +def t_bltarget : Operand<i32> { + let EncoderMethod = "getThumbBLTargetOpValue"; +} + +def t_blxtarget : Operand<i32> { + let EncoderMethod = "getThumbBLXTargetOpValue"; +} + +def MemModeRegThumbAsmOperand : AsmOperandClass { + let Name = "MemModeRegThumb"; + let SuperClasses = []; +} + +def MemModeImmThumbAsmOperand : AsmOperandClass { + let Name = "MemModeImmThumb"; + let SuperClasses = []; +} + // t_addrmode_rr := reg + reg // def t_addrmode_rr : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> { + let EncoderMethod = "getThumbAddrModeRegRegOpValue"; let PrintMethod = "printThumbAddrModeRROperand"; let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); } -// t_addrmode_s4 := reg + reg -// reg + imm5 * 4 +// t_addrmode_rrs := reg + reg // -def t_addrmode_s4 : Operand<i32>, - ComplexPattern<i32, 3, "SelectThumbAddrModeS4", []> { - let PrintMethod = "printThumbAddrModeS4Operand"; - let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +def t_addrmode_rrs1 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> { + let EncoderMethod = "getThumbAddrModeRegRegOpValue"; + let PrintMethod = "printThumbAddrModeRROperand"; + let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); + let ParserMatchClass = MemModeRegThumbAsmOperand; +} +def t_addrmode_rrs2 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> { + let EncoderMethod = "getThumbAddrModeRegRegOpValue"; + let PrintMethod = "printThumbAddrModeRROperand"; + let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); + let ParserMatchClass = MemModeRegThumbAsmOperand; +} +def t_addrmode_rrs4 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> { + let EncoderMethod = "getThumbAddrModeRegRegOpValue"; + let PrintMethod = "printThumbAddrModeRROperand"; + let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg); + let ParserMatchClass = MemModeRegThumbAsmOperand; +} + +// t_addrmode_is4 := reg + imm5 * 4 +// +def t_addrmode_is4 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> { + let EncoderMethod = "getAddrModeISOpValue"; + let PrintMethod = "printThumbAddrModeImm5S4Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); + let ParserMatchClass = MemModeImmThumbAsmOperand; } -// t_addrmode_s2 := reg + reg -// reg + imm5 * 2 +// t_addrmode_is2 := reg + imm5 * 2 // -def t_addrmode_s2 : Operand<i32>, - ComplexPattern<i32, 3, "SelectThumbAddrModeS2", []> { - let PrintMethod = "printThumbAddrModeS2Operand"; - let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +def t_addrmode_is2 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> { + let EncoderMethod = "getAddrModeISOpValue"; + let PrintMethod = "printThumbAddrModeImm5S2Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); + let ParserMatchClass = MemModeImmThumbAsmOperand; } -// t_addrmode_s1 := reg + reg -// reg + imm5 +// t_addrmode_is1 := reg + imm5 // -def t_addrmode_s1 : Operand<i32>, - ComplexPattern<i32, 3, "SelectThumbAddrModeS1", []> { - let PrintMethod = "printThumbAddrModeS1Operand"; - let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg); +def t_addrmode_is1 : Operand<i32>, + ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> { + let EncoderMethod = "getAddrModeISOpValue"; + let PrintMethod = "printThumbAddrModeImm5S1Operand"; + let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm); + let ParserMatchClass = MemModeImmThumbAsmOperand; } // t_addrmode_sp := sp + imm8 * 4 // def t_addrmode_sp : Operand<i32>, ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> { + let EncoderMethod = "getAddrModeThumbSPOpValue"; let PrintMethod = "printThumbAddrModeSPOperand"; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); + let ParserMatchClass = MemModeImmThumbAsmOperand; +} + +// t_addrmode_pc := <label> => pc + imm8 * 4 +// +def t_addrmode_pc : Operand<i32> { + let EncoderMethod = "getAddrModePCOpValue"; + let ParserMatchClass = MemModeImmThumbAsmOperand; } //===----------------------------------------------------------------------===// @@ -126,132 +197,162 @@ def t_addrmode_sp : Operand<i32>, // these will always be in pairs, and asserts if it finds otherwise. Better way? let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def tADJCALLSTACKUP : -PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, - "${:comment} tADJCALLSTACKUP $amt1", - [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>; + PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, + [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, + Requires<[IsThumb, IsThumb1Only]>; def tADJCALLSTACKDOWN : -PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, - "${:comment} tADJCALLSTACKDOWN $amt", - [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; + PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, + [(ARMcallseq_start imm:$amt)]>, + Requires<[IsThumb, IsThumb1Only]>; +} + +// T1Disassembly - A simple class to make encoding some disassembly patterns +// easier and less verbose. +class T1Disassembly<bits<2> op1, bits<8> op2> + : T1Encoding<0b101111> { + let Inst{9-8} = op1; + let Inst{7-0} = op2; } def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101111> { - let Inst{9-8} = 0b11; - let Inst{7-0} = 0b00000000; -} + T1Disassembly<0b11, 0x00>; // A8.6.110 def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101111> { - let Inst{9-8} = 0b11; - let Inst{7-0} = 0b00010000; -} + T1Disassembly<0b11, 0x10>; // A8.6.410 def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101111> { - let Inst{9-8} = 0b11; - let Inst{7-0} = 0b00100000; -} + T1Disassembly<0b11, 0x20>; // A8.6.408 def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101111> { - let Inst{9-8} = 0b11; - let Inst{7-0} = 0b00110000; -} + T1Disassembly<0b11, 0x30>; // A8.6.409 def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101111> { - let Inst{9-8} = 0b11; - let Inst{7-0} = 0b01000000; -} + T1Disassembly<0b11, 0x40>; // A8.6.157 + +// The i32imm operand $val can be used by a debugger to store more information +// about the breakpoint. +def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val", + [/* For disassembly only; pattern left blank */]>, + T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> { + // A8.6.22 + bits<8> val; + let Inst{7-0} = val; +} def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe", [/* For disassembly only; pattern left blank */]>, T1Encoding<0b101101> { + // A8.6.156 let Inst{9-5} = 0b10010; - let Inst{3} = 1; + let Inst{4} = 1; + let Inst{3} = 1; // Big-Endian + let Inst{2-0} = 0b000; } def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle", [/* For disassembly only; pattern left blank */]>, T1Encoding<0b101101> { + // A8.6.156 let Inst{9-5} = 0b10010; - let Inst{3} = 0; + let Inst{4} = 1; + let Inst{3} = 0; // Little-Endian + let Inst{2-0} = 0b000; } -// The i32imm operand $val can be used by a debugger to store more information -// about the breakpoint. -def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val", +// Change Processor State is a system instruction -- for disassembly only. +def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags), + NoItinerary, "cps$imod $iflags", [/* For disassembly only; pattern left blank */]>, - T1Encoding<0b101111> { - let Inst{9-8} = 0b10; + T1Misc<0b0110011> { + // A8.6.38 & B6.1.1 + bit imod; + bits<3> iflags; + + let Inst{4} = imod; + let Inst{3} = 0; + let Inst{2-0} = iflags; } -// Change Processor State is a system instruction -- for disassembly only. -// The singleton $opt operand contains the following information: -// opt{4-0} = mode ==> don't care -// opt{5} = changemode ==> 0 (false for 16-bit Thumb instr) -// opt{8-6} = AIF from Inst{2-0} -// opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable -// -// The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM -// CPS which has more options. -def tCPS : T1I<(outs), (ins cps_opt:$opt), NoItinerary, "cps$opt", - [/* For disassembly only; pattern left blank */]>, - T1Misc<0b0110011>; - // For both thumb1 and thumb2. -let isNotDuplicable = 1 in -def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, - "\n$cp:\n\tadd\t$dst, pc", - [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>, +let isNotDuplicable = 1, isCodeGenOnly = 1 in +def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "", + [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>, T1Special<{0,0,?,?}> { - let Inst{6-3} = 0b1111; // A8.6.6 Rm = pc + // A8.6.6 + bits<3> dst; + let Inst{6-3} = 0b1111; // Rm = pc + let Inst{2-0} = dst; } -// PC relative add. +// PC relative add (ADR). def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, pc, $rhs", []>, - T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 + "add\t$dst, pc, $rhs", []>, + T1Encoding<{1,0,1,0,0,?}> { + // A6.2 & A8.6.10 + bits<3> dst; + bits<8> rhs; + let Inst{10-8} = dst; + let Inst{7-0} = rhs; +} -// ADD rd, sp, #imm8 +// ADD <Rd>, sp, #<imm8> // This is rematerializable, which is particularly useful for taking the // address of locals. -let isReMaterializable = 1 in { +let isReMaterializable = 1 in def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, - "add\t$dst, $sp, $rhs", []>, - T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8 + "add\t$dst, $sp, $rhs", []>, + T1Encoding<{1,0,1,0,1,?}> { + // A6.2 & A8.6.8 + bits<3> dst; + bits<8> rhs; + let Inst{10-8} = dst; + let Inst{7-0} = rhs; } -// ADD sp, sp, #imm7 +// ADD sp, sp, #<imm7> def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, "add\t$dst, $rhs", []>, - T1Misc<{0,0,0,0,0,?,?}>; // A6.2.5 & A8.6.8 + T1Misc<{0,0,0,0,0,?,?}> { + // A6.2.5 & A8.6.8 + bits<7> rhs; + let Inst{6-0} = rhs; +} -// SUB sp, sp, #imm7 +// SUB sp, sp, #<imm7> +// FIXME: The encoding and the ASM string don't match up. def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, "sub\t$dst, $rhs", []>, - T1Misc<{0,0,0,0,1,?,?}>; // A6.2.5 & A8.6.215 + T1Misc<{0,0,0,0,1,?,?}> { + // A6.2.5 & A8.6.214 + bits<7> rhs; + let Inst{6-0} = rhs; +} -// ADD rm, sp +// ADD <Rm>, sp def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, "add\t$dst, $rhs", []>, T1Special<{0,0,?,?}> { - let Inst{6-3} = 0b1101; // A8.6.9 Encoding T1 + // A8.6.9 Encoding T1 + bits<4> dst; + let Inst{7} = dst{3}; + let Inst{6-3} = 0b1101; + let Inst{2-0} = dst{2-0}; } -// ADD sp, rm +// ADD sp, <Rm> def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, "add\t$dst, $rhs", []>, T1Special<{0,0,?,?}> { // A8.6.9 Encoding T2 + bits<4> dst; let Inst{7} = 1; + let Inst{6-3} = dst; let Inst{2-0} = 0b101; } @@ -260,21 +361,37 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, // let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", [(ARMretflag)]>, - T1Special<{1,1,0,?}> { // A6.2.3 & A8.6.25 + def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", + [(ARMretflag)]>, + T1Special<{1,1,0,?}> { + // A6.2.3 & A8.6.25 let Inst{6-3} = 0b1110; // Rm = lr + let Inst{2-0} = 0b000; } + // Alternative return instruction used by vararg functions. - def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target",[]>, - T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25 + def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm), + IIC_Br, "bx\t$Rm", + []>, + T1Special<{1,1,0,?}> { + // A6.2.3 & A8.6.25 + bits<4> Rm; + let Inst{6-3} = Rm; + let Inst{2-0} = 0b000; + } } // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { - def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst", - [(brind GPR:$dst)]>, - T1Special<{1,0,1,?}> { - // <Rd> = Inst{7:2-0} = pc + def tBRIND : TI<(outs), (ins GPR:$Rm), + IIC_Br, + "mov\tpc, $Rm", + [(brind GPR:$Rm)]>, + T1Special<{1,0,?,?}> { + // A8.6.97 + bits<4> Rm; + let Inst{7} = 1; // <Rd> = Inst{7:2-0} = pc + let Inst{6-3} = Rm; let Inst{2-0} = 0b111; } } @@ -282,28 +399,52 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // FIXME: remove when we have a way to marking a MI with these properties. let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in -def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br, - "pop${p}\t$dsts", []>, - T1Misc<{1,1,0,?,?,?,?}>; +def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), + IIC_iPop_Br, + "pop${p}\t$regs", []>, + T1Misc<{1,1,0,?,?,?,?}> { + // A8.6.121 + bits<16> regs; + let Inst{8} = regs{15}; // registers = P:'0000000':register_list + let Inst{7-0} = regs{7-0}; +} +// All calls clobber the non-callee saved registers. SP is marked as a use to +// prevent stack-pointer assignments that appear immediately before calls from +// potentially appearing dead. let isCall = 1, + // On non-Darwin platforms R9 is callee-saved. Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { + D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR], + Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, - "bl\t${func:call}", + (outs), (ins t_bltarget:$func, variable_ops), IIC_Br, + "bl\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotDarwin]>; + Requires<[IsThumb, IsNotDarwin]> { + bits<21> func; + let Inst{25-16} = func{20-11}; + let Inst{13} = 1; + let Inst{11} = 1; + let Inst{10-0} = func{10-0}; + } // ARMv5T and above, also used for Thumb2 def tBLXi : TIx2<0b11110, 0b11, 0, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, - "blx\t${func:call}", + (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br, + "blx\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>; + Requires<[IsThumb, HasV5T, IsNotDarwin]> { + bits<21> func; + let Inst{25-16} = func{20-11}; + let Inst{13} = 1; + let Inst{11} = 1; + let Inst{10-1} = func{10-1}; + let Inst{0} = 0; // func{0} is assumed zero + } // Also used for Thumb2 def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, @@ -313,642 +454,1002 @@ let isCall = 1, T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24; // ARMv4T + // FIXME: Should be a pseudo. + let isCodeGenOnly = 1 in def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?, (outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb1Only, IsNotDarwin]>; + Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>; } -// On Darwin R9 is call-clobbered. let isCall = 1, + // On Darwin R9 is call-clobbered. + // R7 is marked as a use to prevent frame-pointer assignments from being + // moved above / below calls. Defs = [R0, R1, R2, R3, R9, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { + D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR], + Uses = [R7, SP] in { // Also used for Thumb2 def tBLr9 : TIx2<0b11110, 0b11, 1, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, - "bl\t${func:call}", + (outs), (ins pred:$p, t_bltarget:$func, variable_ops), + IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsDarwin]> { + bits<21> func; + let Inst{25-16} = func{20-11}; + let Inst{13} = 1; + let Inst{11} = 1; + let Inst{10-0} = func{10-0}; + } // ARMv5T and above, also used for Thumb2 def tBLXi_r9 : TIx2<0b11110, 0b11, 0, - (outs), (ins i32imm:$func, variable_ops), IIC_Br, - "blx\t${func:call}", + (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), + IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T, IsDarwin]> { + bits<21> func; + let Inst{25-16} = func{20-11}; + let Inst{13} = 1; + let Inst{11} = 1; + let Inst{10-1} = func{10-1}; + let Inst{0} = 0; // func{0} is assumed zero + } // Also used for Thumb2 - def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, - "blx\t$func", + def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, + "blx${p}\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsDarwin]>, - T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24 + T1Special<{1,1,1,?}> { + // A6.2.3 & A8.6.24 + bits<4> func; + let Inst{6-3} = func; + let Inst{2-0} = 0b000; + } // ARMv4T + let isCodeGenOnly = 1 in + // FIXME: Should be a pseudo. def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?, (outs), (ins tGPR:$func, variable_ops), IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb1Only, IsDarwin]>; + Requires<[IsThumb, IsThumb1Only, IsDarwin]>; } -let isBranch = 1, isTerminator = 1 in { - let isBarrier = 1 in { - let isPredicable = 1 in - def tB : T1I<(outs), (ins brtarget:$target), IIC_Br, - "b\t$target", [(br bb:$target)]>, - T1Encoding<{1,1,1,0,0,?}>; +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + let isPredicable = 1 in + def tB : T1I<(outs), (ins t_brtarget:$target), IIC_Br, + "b\t$target", [(br bb:$target)]>, + T1Encoding<{1,1,1,0,0,?}> { + bits<11> target; + let Inst{10-0} = target; + } // Far jump + // Just a pseudo for a tBL instruction. Needed to let regalloc know about + // the clobber of LR. let Defs = [LR] in - def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, - "bl\t$target\t${:comment} far jump",[]>; - - def tBR_JTr : T1JTI<(outs), - (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target\n\t.align\t2$jt", - [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>, - Encoding16 { - let Inst{15-7} = 0b010001101; - let Inst{2-0} = 0b111; - } + def tBfar : tPseudoInst<(outs), (ins t_bltarget:$target), + Size4Bytes, IIC_Br, []>; + + def tBR_JTr : tPseudoInst<(outs), + (ins tGPR:$target, i32imm:$jt, i32imm:$id), + SizeSpecial, IIC_Br, + [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> { + list<Predicate> Predicates = [IsThumb, IsThumb1Only]; } } // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( let isBranch = 1, isTerminator = 1 in - def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br, - "b$cc\t$target", + def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br, + "b${p}\t$target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>, - T1Encoding<{1,1,0,1,?,?}>; + T1Encoding<{1,1,0,1,?,?}> { + bits<4> p; + bits<8> target; + let Inst{11-8} = p; + let Inst{7-0} = target; +} // Compare and branch on zero / non-zero let isBranch = 1, isTerminator = 1 in { - def tCBZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br, - "cbz\t$cmp, $target", []>, - T1Misc<{0,0,?,1,?,?,?}>; + def tCBZ : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br, + "cbz\t$Rn, $target", []>, + T1Misc<{0,0,?,1,?,?,?}> { + // A8.6.27 + bits<6> target; + bits<3> Rn; + let Inst{9} = target{5}; + let Inst{7-3} = target{4-0}; + let Inst{2-0} = Rn; + } - def tCBNZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br, + def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br, "cbnz\t$cmp, $target", []>, - T1Misc<{1,0,?,1,?,?,?}>; + T1Misc<{1,0,?,1,?,?,?}> { + // A8.6.27 + bits<6> target; + bits<3> Rn; + let Inst{9} = target{5}; + let Inst{7-3} = target{4-0}; + let Inst{2-0} = Rn; + } } // A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only // A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1111 then SEE SVC -let isCall = 1 in { -def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>, - Encoding16 { +let isCall = 1, Uses = [SP] in +def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br, + "svc", "\t$imm", []>, Encoding16 { + bits<8> imm; let Inst{15-12} = 0b1101; - let Inst{11-8} = 0b1111; -} + let Inst{11-8} = 0b1111; + let Inst{7-0} = imm; } -// A8.6.16 B: Encoding T1 -// If Inst{11-8} == 0b1110 then UNDEFINED -// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to -// binutils +// The assembler uses 0xDEFE for a trap instruction. let isBarrier = 1, isTerminator = 1 in def tTRAP : TI<(outs), (ins), IIC_Br, - ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 { - let Inst{15-12} = 0b1101; - let Inst{11-8} = 0b1110; + "trap", [(trap)]>, Encoding16 { + let Inst = 0xdefe; } //===----------------------------------------------------------------------===// // Load Store Instructions. // +// Loads: reg/reg and reg/imm5 let canFoldAsLoad = 1, isReMaterializable = 1 in -def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, - "ldr", "\t$dst, $addr", - [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>, - T1LdSt<0b100>; -def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, - "ldr", "\t$dst, $addr", - []>, - T1LdSt4Imm<{1,?,?}>; - -def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, - "ldrb", "\t$dst, $addr", - [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>, - T1LdSt<0b110>; -def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, - "ldrb", "\t$dst, $addr", - []>, - T1LdSt1Imm<{1,?,?}>; - -def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, - "ldrh", "\t$dst, $addr", - [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>, - T1LdSt<0b101>; -def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, - "ldrh", "\t$dst, $addr", - []>, - T1LdSt2Imm<{1,?,?}>; +multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc, + Operand AddrMode_r, Operand AddrMode_i, + AddrMode am, InstrItinClass itin_r, + InstrItinClass itin_i, string asm, + PatFrag opnode> { + def r : // reg/reg + T1pILdStEncode<reg_opc, + (outs tGPR:$Rt), (ins AddrMode_r:$addr), + am, itin_r, asm, "\t$Rt, $addr", + [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>; + def i : // reg/imm5 + T1pILdStEncodeImm<imm_opc, 1 /* Load */, + (outs tGPR:$Rt), (ins AddrMode_i:$addr), + am, itin_i, asm, "\t$Rt, $addr", + [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>; +} +// Stores: reg/reg and reg/imm5 +multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc, + Operand AddrMode_r, Operand AddrMode_i, + AddrMode am, InstrItinClass itin_r, + InstrItinClass itin_i, string asm, + PatFrag opnode> { + def r : // reg/reg + T1pILdStEncode<reg_opc, + (outs), (ins tGPR:$Rt, AddrMode_r:$addr), + am, itin_r, asm, "\t$Rt, $addr", + [(opnode tGPR:$Rt, AddrMode_r:$addr)]>; + def i : // reg/imm5 + T1pILdStEncodeImm<imm_opc, 0 /* Store */, + (outs), (ins tGPR:$Rt, AddrMode_i:$addr), + am, itin_i, asm, "\t$Rt, $addr", + [(opnode tGPR:$Rt, AddrMode_i:$addr)]>; +} + +// A8.6.57 & A8.6.60 +defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4, + t_addrmode_is4, AddrModeT1_4, + IIC_iLoad_r, IIC_iLoad_i, "ldr", + UnOpFrag<(load node:$Src)>>; + +// A8.6.64 & A8.6.61 +defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1, + t_addrmode_is1, AddrModeT1_1, + IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb", + UnOpFrag<(zextloadi8 node:$Src)>>; + +// A8.6.76 & A8.6.73 +defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2, + t_addrmode_is2, AddrModeT1_2, + IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh", + UnOpFrag<(zextloadi16 node:$Src)>>; let AddedComplexity = 10 in -def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, +def tLDRSB : // A8.6.80 + T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr), + AddrModeT1_1, IIC_iLoad_bh_r, "ldrsb", "\t$dst, $addr", - [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>, - T1LdSt<0b011>; + [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; let AddedComplexity = 10 in -def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, +def tLDRSH : // A8.6.84 + T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr), + AddrModeT1_2, IIC_iLoad_bh_r, "ldrsh", "\t$dst, $addr", - [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>, - T1LdSt<0b111>; + [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; let canFoldAsLoad = 1 in -def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, - "ldr", "\t$dst, $addr", - [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>, - T1LdStSP<{1,?,?}>; +def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, + T1LdStSP<{1,?,?}> { + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in -def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, - "ldr", "\t$dst, $addr", []>, - T1LdStSP<{1,?,?}>; +// FIXME: Pseudo for tLDRspi +def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i, + "ldr", "\t$dst, $addr", []>, + T1LdStSP<{1,?,?}> { + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. let canFoldAsLoad = 1, isReMaterializable = 1 in -def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, - "ldr", ".n\t$dst, $addr", - [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>, - T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59 - -// Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, - isReMaterializable = 1 in -def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, - "ldr", "\t$dst, $addr", []>, - T1LdStSP<{1,?,?}>; - -def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, - "str", "\t$src, $addr", - [(store tGPR:$src, t_addrmode_s4:$addr)]>, - T1LdSt<0b000>; -def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, - "str", "\t$src, $addr", - []>, - T1LdSt4Imm<{0,?,?}>; - -def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, - "strb", "\t$src, $addr", - [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>, - T1LdSt<0b010>; -def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, - "strb", "\t$src, $addr", - []>, - T1LdSt1Imm<{0,?,?}>; - -def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, - "strh", "\t$src, $addr", - [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>, - T1LdSt<0b001>; -def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, - "strh", "\t$src, $addr", - []>, - T1LdSt2Imm<{0,?,?}>; - -def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, - "str", "\t$src, $addr", - [(store tGPR:$src, t_addrmode_sp:$addr)]>, - T1LdStSP<{0,?,?}>; - -let mayStore = 1, neverHasSideEffects = 1 in { -// Special instruction for spill. It cannot clobber condition register -// when it's expanded by eliminateCallFramePseudoInstr(). -def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, +def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", ".n\t$Rt, $addr", + [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, + T1Encoding<{0,1,0,0,1,?}> { + // A6.2 & A8.6.59 + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + +// A8.6.194 & A8.6.192 +defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, + t_addrmode_is4, AddrModeT1_4, + IIC_iStore_r, IIC_iStore_i, "str", + BinOpFrag<(store node:$LHS, node:$RHS)>>; + +// A8.6.197 & A8.6.195 +defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1, + t_addrmode_is1, AddrModeT1_1, + IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", + BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; + +// A8.6.207 & A8.6.205 +defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2, + t_addrmode_is2, AddrModeT1_2, + IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", + BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; + + +def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, + "str", "\t$Rt, $addr", + [(store tGPR:$Rt, t_addrmode_sp:$addr)]>, + T1LdStSP<{0,?,?}> { + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + +let mayStore = 1, neverHasSideEffects = 1 in +// Special instruction for spill. It cannot clobber condition register when it's +// expanded by eliminateCallFramePseudoInstr(). +// FIXME: Pseudo for tSTRspi +def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i, "str", "\t$src, $addr", []>, - T1LdStSP<{0,?,?}>; + T1LdStSP<{0,?,?}> { + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; } //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // -// These requires base address to be written back or one of the loaded regs. -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -def tLDM : T1I<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), - IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $dsts", []>, - T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 - -def tLDM_UPD : T1It<(outs tGPR:$wb), - (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), - IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr!, $dsts", - "$addr.addr = $wb", []>, - T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 -} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq - -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in -def tSTM_UPD : T1It<(outs tGPR:$wb), - (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), - IIC_iStorem, - "stm${addr:submode}${p}\t$addr!, $srcs", - "$addr.addr = $wb", []>, - T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189 +multiclass thumb_ldst_mult<string asm, InstrItinClass itin, + InstrItinClass itin_upd, bits<6> T1Enc, + bit L_bit> { + def IA : + T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>, + T1Encoding<T1Enc> { + bits<3> Rn; + bits<8> regs; + let Inst{10-8} = Rn; + let Inst{7-0} = regs; + } + def IA_UPD : + T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>, + T1Encoding<T1Enc> { + bits<3> Rn; + bits<8> regs; + let Inst{10-8} = Rn; + let Inst{7-0} = regs; + } +} + +// These require base address to be written back or one of the loaded regs. +let neverHasSideEffects = 1 in { + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, + {1,1,0,0,1,?}, 1>; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, + {1,1,0,0,0,?}, 0>; + +} // neverHasSideEffects let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in -def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br, - "pop${p}\t$dsts", []>, - T1Misc<{1,1,0,?,?,?,?}>; +def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), + IIC_iPop, + "pop${p}\t$regs", []>, + T1Misc<{1,1,0,?,?,?,?}> { + bits<16> regs; + let Inst{8} = regs{15}; + let Inst{7-0} = regs{7-0}; +} let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in -def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_Br, - "push${p}\t$srcs", []>, - T1Misc<{0,1,0,?,?,?,?}>; +def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), + IIC_iStore_m, + "push${p}\t$regs", []>, + T1Misc<{0,1,0,?,?,?,?}> { + bits<16> regs; + let Inst{8} = regs{14}; + let Inst{7-0} = regs{7-0}; +} //===----------------------------------------------------------------------===// // Arithmetic Instructions. // +// Helper classes for encoding T1pI patterns: +class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T1pI<oops, iops, itin, opc, asm, pattern>, + T1DataProcessing<opA> { + bits<3> Rm; + bits<3> Rn; + let Inst{5-3} = Rm; + let Inst{2-0} = Rn; +} +class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T1pI<oops, iops, itin, opc, asm, pattern>, + T1Misc<opA> { + bits<3> Rm; + bits<3> Rd; + let Inst{5-3} = Rm; + let Inst{2-0} = Rd; +} + +// Helper classes for encoding T1sI patterns: +class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T1sI<oops, iops, itin, opc, asm, pattern>, + T1DataProcessing<opA> { + bits<3> Rd; + bits<3> Rn; + let Inst{5-3} = Rn; + let Inst{2-0} = Rd; +} +class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T1sI<oops, iops, itin, opc, asm, pattern>, + T1General<opA> { + bits<3> Rm; + bits<3> Rn; + bits<3> Rd; + let Inst{8-6} = Rm; + let Inst{5-3} = Rn; + let Inst{2-0} = Rd; +} +class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T1sI<oops, iops, itin, opc, asm, pattern>, + T1General<opA> { + bits<3> Rd; + bits<3> Rm; + let Inst{5-3} = Rm; + let Inst{2-0} = Rd; +} + +// Helper classes for encoding T1sIt patterns: +class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T1sIt<oops, iops, itin, opc, asm, pattern>, + T1DataProcessing<opA> { + bits<3> Rdn; + bits<3> Rm; + let Inst{5-3} = Rm; + let Inst{2-0} = Rdn; +} +class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T1sIt<oops, iops, itin, opc, asm, pattern>, + T1General<opA> { + bits<3> Rdn; + bits<8> imm8; + let Inst{10-8} = Rdn; + let Inst{7-0} = imm8; +} + // Add with carry register let isCommutable = 1, Uses = [CPSR] in -def tADC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "adc", "\t$dst, $rhs", - [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b0101>; +def tADC : // A8.6.2 + T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr, + "adc", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>; // Add immediate -def tADDi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, - "add", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>, - T1General<0b01110>; +def tADDi3 : // A8.6.4 T1 + T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), IIC_iALUi, + "add", "\t$Rd, $Rm, $imm3", + [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> { + bits<3> imm3; + let Inst{8-6} = imm3; +} -def tADDi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, - "add", "\t$dst, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>, - T1General<{1,1,0,?,?}>; +def tADDi8 : // A8.6.4 T2 + T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8), + IIC_iALUi, + "add", "\t$Rdn, $imm8", + [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>; // Add register let isCommutable = 1 in -def tADDrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "add", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>, - T1General<0b01100>; +def tADDrr : // A8.6.6 T1 + T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iALUr, + "add", "\t$Rd, $Rn, $Rm", + [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>; let neverHasSideEffects = 1 in -def tADDhirr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add", "\t$dst, $rhs", []>, - T1Special<{0,0,?,?}>; +def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, + "add", "\t$Rdn, $Rm", []>, + T1Special<{0,0,?,?}> { + // A8.6.6 T2 + bits<4> Rdn; + bits<4> Rm; + let Inst{7} = Rdn{3}; + let Inst{6-3} = Rm; + let Inst{2-0} = Rdn{2-0}; +} -// And register +// AND register let isCommutable = 1 in -def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "and", "\t$dst, $rhs", - [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b0000>; +def tAND : // A8.6.12 + T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iBITr, + "and", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>; // ASR immediate -def tASRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, - "asr", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>, - T1General<{0,1,0,?,?}>; +def tASRri : // A8.6.14 + T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + IIC_iMOVsi, + "asr", "\t$Rd, $Rm, $imm5", + [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> { + bits<5> imm5; + let Inst{10-6} = imm5; +} // ASR register -def tASRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, - "asr", "\t$dst, $rhs", - [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b0100>; +def tASRrr : // A8.6.15 + T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iMOVsr, + "asr", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>; // BIC register -def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "bic", "\t$dst, $rhs", - [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>, - T1DataProcessing<0b1110>; +def tBIC : // A8.6.20 + T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iBITr, + "bic", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>; // CMN register -let Defs = [CPSR] in { +let isCompare = 1, Defs = [CPSR] in { //FIXME: Disable CMN, as CCodes are backwards from compare expectations // Compare-to-zero still works out, just not the relationals -//def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, -// "cmn", "\t$lhs, $rhs", -// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>, -// T1DataProcessing<0b1011>; -def tCMNz : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "cmn", "\t$lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>, - T1DataProcessing<0b1011>; -} +//def tCMN : // A8.6.33 +// T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs), +// IIC_iCMPr, +// "cmn", "\t$lhs, $rhs", +// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; + +def tCMNz : // A8.6.33 + T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iCMPr, + "cmn", "\t$Rn, $Rm", + [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>; + +} // isCompare = 1, Defs = [CPSR] // CMP immediate -let Defs = [CPSR] in { -def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, - "cmp", "\t$lhs, $rhs", - [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>, - T1General<{1,0,1,?,?}>; -def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, - "cmp", "\t$lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>, - T1General<{1,0,1,?,?}>; +let isCompare = 1, Defs = [CPSR] in { +def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi, + "cmp", "\t$Rn, $imm8", + [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>, + T1General<{1,0,1,?,?}> { + // A8.6.35 + bits<3> Rn; + bits<8> imm8; + let Inst{10-8} = Rn; + let Inst{7-0} = imm8; } // CMP register -let Defs = [CPSR] in { -def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "cmp", "\t$lhs, $rhs", - [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>, - T1DataProcessing<0b1010>; -def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "cmp", "\t$lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>, - T1DataProcessing<0b1010>; - -def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, - "cmp", "\t$lhs, $rhs", []>, - T1Special<{0,1,?,?}>; -def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, - "cmp", "\t$lhs, $rhs", []>, - T1Special<{0,1,?,?}>; +def tCMPr : // A8.6.36 T1 + T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iCMPr, + "cmp", "\t$Rn, $Rm", + [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>; + +def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr, + "cmp", "\t$Rn, $Rm", []>, + T1Special<{0,1,?,?}> { + // A8.6.36 T2 + bits<4> Rm; + bits<4> Rn; + let Inst{7} = Rn{3}; + let Inst{6-3} = Rm; + let Inst{2-0} = Rn{2-0}; } +} // isCompare = 1, Defs = [CPSR] // XOR register let isCommutable = 1 in -def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "eor", "\t$dst, $rhs", - [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b0001>; +def tEOR : // A8.6.45 + T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iBITr, + "eor", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>; // LSL immediate -def tLSLri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, - "lsl", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>, - T1General<{0,0,0,?,?}>; +def tLSLri : // A8.6.88 + T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + IIC_iMOVsi, + "lsl", "\t$Rd, $Rm, $imm5", + [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> { + bits<5> imm5; + let Inst{10-6} = imm5; +} // LSL register -def tLSLrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, - "lsl", "\t$dst, $rhs", - [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b0010>; +def tLSLrr : // A8.6.89 + T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iMOVsr, + "lsl", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>; // LSR immediate -def tLSRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, - "lsr", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>, - T1General<{0,0,1,?,?}>; +def tLSRri : // A8.6.90 + T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5), + IIC_iMOVsi, + "lsr", "\t$Rd, $Rm, $imm5", + [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> { + bits<5> imm5; + let Inst{10-6} = imm5; +} // LSR register -def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, - "lsr", "\t$dst, $rhs", - [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b0011>; - -// move register -def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi, - "mov", "\t$dst, $src", - [(set tGPR:$dst, imm0_255:$src)]>, - T1General<{1,0,0,?,?}>; +def tLSRrr : // A8.6.91 + T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iMOVsr, + "lsr", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>; + +// Move register +let isMoveImm = 1 in +def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi, + "mov", "\t$Rd, $imm8", + [(set tGPR:$Rd, imm0_255:$imm8)]>, + T1General<{1,0,0,?,?}> { + // A8.6.96 + bits<3> Rd; + bits<8> imm8; + let Inst{10-8} = Rd; + let Inst{7-0} = imm8; +} // TODO: A7-73: MOV(2) - mov setting flag. - let neverHasSideEffects = 1 in { // FIXME: Make this predicable. -def tMOVr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "mov\t$dst, $src", []>, - T1Special<0b1000>; +def tMOVr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, + "mov\t$Rd, $Rm", []>, + T1Special<0b1000> { + // A8.6.97 + bits<4> Rd; + bits<4> Rm; + // Bits {7-6} are encoded by the T1Special value. + let Inst{5-3} = Rm{2-0}; + let Inst{2-0} = Rd{2-0}; +} let Defs = [CPSR] in -def tMOVSr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "movs\t$dst, $src", []>, Encoding16 { +def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, + "movs\t$Rd, $Rm", []>, Encoding16 { + // A8.6.97 + bits<3> Rd; + bits<3> Rm; let Inst{15-6} = 0b0000000000; + let Inst{5-3} = Rm; + let Inst{2-0} = Rd; } // FIXME: Make these predicable. -def tMOVgpr2tgpr : T1I<(outs tGPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov\t$dst, $src", []>, - T1Special<{1,0,0,?}>; -def tMOVtgpr2gpr : T1I<(outs GPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "mov\t$dst, $src", []>, - T1Special<{1,0,?,0}>; -def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov\t$dst, $src", []>, - T1Special<{1,0,?,?}>; +def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, + "mov\t$Rd, $Rm", []>, + T1Special<{1,0,0,?}> { + // A8.6.97 + bits<4> Rd; + bits<4> Rm; + // Bit {7} is encoded by the T1Special value. + let Inst{6-3} = Rm; + let Inst{2-0} = Rd{2-0}; +} +def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, + "mov\t$Rd, $Rm", []>, + T1Special<{1,0,?,0}> { + // A8.6.97 + bits<4> Rd; + bits<4> Rm; + // Bit {6} is encoded by the T1Special value. + let Inst{7} = Rd{3}; + let Inst{5-3} = Rm{2-0}; + let Inst{2-0} = Rd{2-0}; +} +def tMOVgpr2gpr : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, + "mov\t$Rd, $Rm", []>, + T1Special<{1,0,?,?}> { + // A8.6.97 + bits<4> Rd; + bits<4> Rm; + let Inst{7} = Rd{3}; + let Inst{6-3} = Rm; + let Inst{2-0} = Rd{2-0}; +} } // neverHasSideEffects -// multiply register +// Multiply register let isCommutable = 1 in -def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, - "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */ - [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b1101>; - -// move inverse register -def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "mvn", "\t$dst, $src", - [(set tGPR:$dst, (not tGPR:$src))]>, - T1DataProcessing<0b1111>; - -// bitwise or register +def tMUL : // A8.6.105 T1 + T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iMUL32, + "mul", "\t$Rdn, $Rm, $Rdn", + [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>; + +// Move inverse register +def tMVN : // A8.6.107 + T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr, + "mvn", "\t$Rd, $Rn", + [(set tGPR:$Rd, (not tGPR:$Rn))]>; + +// Bitwise or register let isCommutable = 1 in -def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "orr", "\t$dst, $rhs", - [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b1100>; - -// swaps -def tREV : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "rev", "\t$dst, $src", - [(set tGPR:$dst, (bswap tGPR:$src))]>, - Requires<[IsThumb1Only, HasV6]>, - T1Misc<{1,0,1,0,0,0,?}>; - -def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "rev16", "\t$dst, $src", - [(set tGPR:$dst, - (or (and (srl tGPR:$src, (i32 8)), 0xFF), - (or (and (shl tGPR:$src, (i32 8)), 0xFF00), - (or (and (srl tGPR:$src, (i32 8)), 0xFF0000), - (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>, - Requires<[IsThumb1Only, HasV6]>, - T1Misc<{1,0,1,0,0,1,?}>; - -def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "revsh", "\t$dst, $src", - [(set tGPR:$dst, - (sext_inreg - (or (srl (and tGPR:$src, 0xFF00), (i32 8)), - (shl tGPR:$src, (i32 8))), i16))]>, - Requires<[IsThumb1Only, HasV6]>, - T1Misc<{1,0,1,0,1,1,?}>; - -// rotate right register -def tROR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, - "ror", "\t$dst, $rhs", - [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b0111>; - -// negate register -def tRSB : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALUi, - "rsb", "\t$dst, $src, #0", - [(set tGPR:$dst, (ineg tGPR:$src))]>, - T1DataProcessing<0b1001>; +def tORR : // A8.6.114 + T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iBITr, + "orr", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>; + +// Swaps +def tREV : // A8.6.134 + T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), + IIC_iUNAr, + "rev", "\t$Rd, $Rm", + [(set tGPR:$Rd, (bswap tGPR:$Rm))]>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +def tREV16 : // A8.6.135 + T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), + IIC_iUNAr, + "rev16", "\t$Rd, $Rm", + [(set tGPR:$Rd, + (or (and (srl tGPR:$Rm, (i32 8)), 0xFF), + (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00), + (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000), + (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +def tREVSH : // A8.6.136 + T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), + IIC_iUNAr, + "revsh", "\t$Rd, $Rm", + [(set tGPR:$Rd, + (sext_inreg + (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), + (shl tGPR:$Rm, (i32 8))), i16))]>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +// Rotate right register +def tROR : // A8.6.139 + T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iMOVsr, + "ror", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>; + +// Negate register +def tRSB : // A8.6.141 + T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn), + IIC_iALUi, + "rsb", "\t$Rd, $Rn, #0", + [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; // Subtract with carry register let Uses = [CPSR] in -def tSBC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "sbc", "\t$dst, $rhs", - [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>, - T1DataProcessing<0b0110>; +def tSBC : // A8.6.151 + T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iALUr, + "sbc", "\t$Rdn, $Rm", + [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>; // Subtract immediate -def tSUBi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, - "sub", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>, - T1General<0b01111>; - -def tSUBi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, - "sub", "\t$dst, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>, - T1General<{1,1,1,?,?}>; - -// subtract register -def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "sub", "\t$dst, $lhs, $rhs", - [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>, - T1General<0b01101>; +def tSUBi3 : // A8.6.210 T1 + T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), + IIC_iALUi, + "sub", "\t$Rd, $Rm, $imm3", + [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> { + bits<3> imm3; + let Inst{8-6} = imm3; +} -// TODO: A7-96: STMIA - store multiple. +def tSUBi8 : // A8.6.210 T2 + T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8), + IIC_iALUi, + "sub", "\t$Rdn, $imm8", + [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>; + +// Subtract register +def tSUBrr : // A8.6.212 + T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), + IIC_iALUr, + "sub", "\t$Rd, $Rn, $Rm", + [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>; -// sign-extend byte -def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "sxtb", "\t$dst, $src", - [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>, - Requires<[IsThumb1Only, HasV6]>, - T1Misc<{0,0,1,0,0,1,?}>; - -// sign-extend short -def tSXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "sxth", "\t$dst, $src", - [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>, - Requires<[IsThumb1Only, HasV6]>, - T1Misc<{0,0,1,0,0,0,?}>; - -// test -let isCommutable = 1, Defs = [CPSR] in -def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "tst", "\t$lhs, $rhs", - [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>, - T1DataProcessing<0b1000>; - -// zero-extend byte -def tUXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "uxtb", "\t$dst, $src", - [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>, - Requires<[IsThumb1Only, HasV6]>, - T1Misc<{0,0,1,0,1,1,?}>; - -// zero-extend short -def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "uxth", "\t$dst, $src", - [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>, - Requires<[IsThumb1Only, HasV6]>, - T1Misc<{0,0,1,0,1,0,?}>; +// TODO: A7-96: STMIA - store multiple. +// Sign-extend byte +def tSXTB : // A8.6.222 + T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), + IIC_iUNAr, + "sxtb", "\t$Rd, $Rm", + [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +// Sign-extend short +def tSXTH : // A8.6.224 + T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), + IIC_iUNAr, + "sxth", "\t$Rd, $Rm", + [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +// Test +let isCompare = 1, isCommutable = 1, Defs = [CPSR] in +def tTST : // A8.6.230 + T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr, + "tst", "\t$Rn, $Rm", + [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>; + +// Zero-extend byte +def tUXTB : // A8.6.262 + T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), + IIC_iUNAr, + "uxtb", "\t$Rd, $Rm", + [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +// Zero-extend short +def tUXTH : // A8.6.264 + T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), + IIC_iUNAr, + "uxth", "\t$Rd, $Rm", + [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation. // Expanded after instruction selection into a branch sequence. let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), - NoItinerary, "${:comment} tMOVCCr $cc", + NoItinerary, [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; // 16-bit movcc in IT blocks for Thumb2. let neverHasSideEffects = 1 in { -def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, - "mov", "\t$dst, $rhs", []>, - T1Special<{1,0,?,?}>; +def tMOVCCr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iCMOVr, + "mov", "\t$Rdn, $Rm", []>, + T1Special<{1,0,?,?}> { + bits<4> Rdn; + bits<4> Rm; + let Inst{7} = Rdn{3}; + let Inst{6-3} = Rm; + let Inst{2-0} = Rdn{2-0}; +} + +let isMoveImm = 1 in +def tMOVCCi : T1pIt<(outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$Rm), IIC_iCMOVi, + "mov", "\t$Rdn, $Rm", []>, + T1General<{1,0,0,?,?}> { + bits<3> Rdn; + bits<8> Rm; + let Inst{10-8} = Rdn; + let Inst{7-0} = Rm; +} -def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi, - "mov", "\t$dst, $rhs", []>, - T1General<{1,0,0,?,?}>; } // neverHasSideEffects // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. -let neverHasSideEffects = 1 in { -let isReMaterializable = 1 in -def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr$p\t$dst, #$label", []>, - T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 -} // neverHasSideEffects -def tLEApcrelJT : T1I<(outs tGPR:$dst), - (ins i32imm:$label, nohash_imm:$id, pred:$p), - IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>, - T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 +def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), + IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>, + T1Encoding<{1,0,1,0,0,?}> { + bits<3> Rd; + bits<8> addr; + let Inst{10-8} = Rd; + let Inst{7-0} = addr; +} + +let neverHasSideEffects = 1, isReMaterializable = 1 in +def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), + Size2Bytes, IIC_iALUi, []>; + +def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), + (ins i32imm:$label, nohash_imm:$id, pred:$p), + Size2Bytes, IIC_iALUi, []>; + +//===----------------------------------------------------------------------===// +// Move between coprocessor and ARM core register -- for disassembly only +// + +class tMovRCopro<string opc, bit direction> + : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), + [/* For disassembly only; pattern left blank */]> { + let Inst{27-24} = 0b1110; + let Inst{20} = direction; + let Inst{4} = 1; + + bits<4> Rt; + bits<4> cop; + bits<3> opc1; + bits<3> opc2; + bits<4> CRm; + bits<4> CRn; + + let Inst{15-12} = Rt; + let Inst{11-8} = cop; + let Inst{23-21} = opc1; + let Inst{7-5} = opc2; + let Inst{3-0} = CRm; + let Inst{19-16} = CRn; +} + +def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>; +def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>; + +class tMovRRCopro<string opc, bit direction> + : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), + [/* For disassembly only; pattern left blank */]> { + let Inst{27-24} = 0b1100; + let Inst{23-21} = 0b010; + let Inst{20} = direction; + + bits<4> Rt; + bits<4> Rt2; + bits<4> cop; + bits<4> opc1; + bits<4> CRm; + + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{11-8} = cop; + let Inst{7-4} = opc1; + let Inst{3-0} = CRm; +} + +def tMCRR : tMovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>; +def tMRRC : tMovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; + +//===----------------------------------------------------------------------===// +// Other Coprocessor Instructions. For disassembly only. +// +def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{27-24} = 0b1110; + + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; +} //===----------------------------------------------------------------------===// // TLS Instructions // // __aeabi_read_tp preserves the registers r1-r3. -let isCall = 1, - Defs = [R0, LR] in { - def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, - "bl\t__aeabi_read_tp", - [(set R0, ARMthread_pointer)]>; +let isCall = 1, Defs = [R0, LR], Uses = [SP] in +def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, + "bl\t__aeabi_read_tp", + [(set R0, ARMthread_pointer)]> { + // Encoding is 0xf7fffffe. + let Inst = 0xf7fffffe; } +//===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics -// eh_sjlj_setjmp() is an instruction sequence to store the return -// address and save #0 in R0 for the non-longjmp case. -// Since by its nature we may be coming from some other function to get -// here, and we're using the stack frame for the containing function to -// save/restore registers, we can't keep anything live in regs across -// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon -// when we get here from a longjmp(). We force everthing out of registers -// except for our own input by listing the relevant registers in Defs. By -// doing so, we also cause the prologue/epilogue code to actively preserve -// all of the callee-saved resgisters, which is exactly what we want. -// $val is a scratch register for our use. -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1, - isBarrier = 1 in { - def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), - AddrModeNone, SizeSpecial, NoItinerary, - "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" - "adds\t$val, #7\n\t" - "str\t$val, [$src, #4]\n\t" - "movs\tr0, #0\n\t" - "b\t1f\n\t" - "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" - "1:", "", - [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; -} +// + +// eh_sjlj_setjmp() is an instruction sequence to store the return address and +// save #0 in R0 for the non-longjmp case. Since by its nature we may be coming +// from some other function to get here, and we're using the stack frame for the +// containing function to save/restore registers, we can't keep anything live in +// regs across the eh_sjlj_setjmp(), else it will almost certainly have been +// tromped upon when we get here from a longjmp(). We force everthing out of +// registers except for our own input by listing the relevant registers in +// Defs. By doing so, we also cause the prologue/epilogue code to actively +// preserve all of the callee-saved resgisters, which is exactly what we want. +// $val is a scratch register for our use. +let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], + hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in +def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), + AddrModeNone, SizeSpecial, NoItinerary, "","", + [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; // FIXME: Non-Darwin version(s) -let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, - Defs = [ R7, LR, SP ] in { +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1, + Defs = [ R7, LR, SP ] in def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), - AddrModeNone, SizeSpecial, IndexModeNone, - Pseudo, NoItinerary, - "ldr\t$scratch, [$src, #8]\n\t" - "mov\tsp, $scratch\n\t" - "ldr\t$scratch, [$src, #4]\n\t" - "ldr\tr7, [$src]\n\t" - "bx\t$scratch", "", - [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsThumb, IsDarwin]>; -} + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, "", "", + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsThumb, IsDarwin]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns // +// Comparisons +def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8), + (tCMPi8 tGPR:$Rn, imm0_255:$imm8)>; +def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm), + (tCMPr tGPR:$Rn, tGPR:$Rm)>; + // Add with carry def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs), (tADDi3 tGPR:$lhs, imm0_7:$rhs)>; @@ -991,27 +1492,42 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>, Requires<[IsThumb, HasV5T, IsDarwin]>; // zextload i1 -> zextload i8 -def : T1Pat<(zextloadi1 t_addrmode_s1:$addr), - (tLDRB t_addrmode_s1:$addr)>; +def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), + (tLDRBr t_addrmode_rrs1:$addr)>; +def : T1Pat<(zextloadi1 t_addrmode_is1:$addr), + (tLDRBi t_addrmode_is1:$addr)>; // extload -> zextload -def : T1Pat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; -def : T1Pat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; -def : T1Pat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; +def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; +def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>; +def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; // If it's impossible to use [r,r] address mode for sextload, select to // ldr{b|h} + sxt{b|h} instead. -def : T1Pat<(sextloadi8 t_addrmode_s1:$addr), - (tSXTB (tLDRB t_addrmode_s1:$addr))>, - Requires<[IsThumb1Only, HasV6]>; -def : T1Pat<(sextloadi16 t_addrmode_s2:$addr), - (tSXTH (tLDRH t_addrmode_s2:$addr))>, - Requires<[IsThumb1Only, HasV6]>; - -def : T1Pat<(sextloadi8 t_addrmode_s1:$addr), - (tASRri (tLSLri (tLDRB t_addrmode_s1:$addr), 24), 24)>; -def : T1Pat<(sextloadi16 t_addrmode_s1:$addr), - (tASRri (tLSLri (tLDRH t_addrmode_s1:$addr), 16), 16)>; +def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), + (tSXTB (tLDRBi t_addrmode_is1:$addr))>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; +def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr), + (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; +def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), + (tSXTH (tLDRHi t_addrmode_is2:$addr))>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; +def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), + (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr), + (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>; +def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), + (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>; +def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr), + (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>; +def : T1Pat<(sextloadi16 t_addrmode_is2:$addr), + (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>; // Large immediate handling. @@ -1028,8 +1544,7 @@ def : T1Pat<(i32 imm0_255_comp:$src), // scheduling. let isReMaterializable = 1 in def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, - "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, - Requires<[IsThumb1Only]>; + Requires<[IsThumb, IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 6ba0a44..0e01be5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -21,16 +21,12 @@ def it_mask : Operand<i32> { let PrintMethod = "printThumbITMask"; } -// Table branch address -def tb_addrmode : Operand<i32> { - let PrintMethod = "printTBAddrMode"; -} - // Shifted operands. No register controlled shifts for Thumb2. // Note: We do not support rrx shifted operands yet. def t2_so_reg : Operand<i32>, // reg imm ComplexPattern<i32, 2, "SelectT2ShifterOperandReg", [shl,srl,sra,rotr]> { + let EncoderMethod = "getT2SORegOpValue"; let PrintMethod = "printT2SOOperand"; let MIOperandInfo = (ops rGPR, i32imm); } @@ -47,11 +43,10 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit -// immediate splatted into multiple bytes of the word. t2_so_imm values are -// represented in the imm field in the same 12-bit form that they are encoded -// into t2_so_imm instructions: the 8-bit immediate is the least significant -// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11]. -def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>; +// immediate splatted into multiple bytes of the word. +def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> { + let EncoderMethod = "getT2SOImmOpValue"; +} // t2_so_imm_not - Match an immediate that is a complement // of a t2_so_imm. @@ -63,7 +58,7 @@ def t2_so_imm_not : Operand<i32>, // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1; + return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; // Break t2_so_imm's up into two pieces. This handles immediates with up to 16 @@ -128,27 +123,41 @@ def imm0_255_not : PatLeaf<(i32 imm), [{ // t2addrmode_imm12 := reg + imm12 def t2addrmode_imm12 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> { - let PrintMethod = "printT2AddrModeImm12Operand"; + let PrintMethod = "printAddrModeImm12Operand"; + let EncoderMethod = "getAddrModeImm12OpValue"; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); + let ParserMatchClass = MemMode5AsmOperand; } +// ADR instruction labels. +def t2adrlabel : Operand<i32> { + let EncoderMethod = "getT2AdrLabelOpValue"; +} + + // t2addrmode_imm8 := reg +/- imm8 def t2addrmode_imm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand"; + let EncoderMethod = "getT2AddrModeImm8OpValue"; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); + let ParserMatchClass = MemMode5AsmOperand; } def t2am_imm8_offset : Operand<i32>, - ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", []>{ + ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", + [], [SDNPWantRoot]> { let PrintMethod = "printT2AddrModeImm8OffsetOperand"; + let EncoderMethod = "getT2AddrModeImm8OffsetOpValue"; + let ParserMatchClass = MemMode5AsmOperand; } // t2addrmode_imm8s4 := reg +/- (imm8 << 2) -def t2addrmode_imm8s4 : Operand<i32>, - ComplexPattern<i32, 2, "SelectT2AddrModeImm8s4", []> { +def t2addrmode_imm8s4 : Operand<i32> { let PrintMethod = "printT2AddrModeImm8s4Operand"; + let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); + let ParserMatchClass = MemMode5AsmOperand; } def t2am_imm8s4_offset : Operand<i32> { @@ -159,7 +168,9 @@ def t2am_imm8s4_offset : Operand<i32> { def t2addrmode_so_reg : Operand<i32>, ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> { let PrintMethod = "printT2AddrModeSoRegOperand"; + let EncoderMethod = "getT2AddrModeSORegOpValue"; let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm); + let ParserMatchClass = MemMode5AsmOperand; } @@ -167,45 +178,294 @@ def t2addrmode_so_reg : Operand<i32>, // Multiclass helpers... // + +class T2OneRegImm<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<12> imm; + + let Inst{11-8} = Rd; + let Inst{26} = imm{11}; + let Inst{14-12} = imm{10-8}; + let Inst{7-0} = imm{7-0}; +} + + +class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2sI<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + + let Inst{11-8} = Rd; + let Inst{26} = imm{11}; + let Inst{14-12} = imm{10-8}; + let Inst{7-0} = imm{7-0}; +} + +class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rn; + bits<12> imm; + + let Inst{19-16} = Rn; + let Inst{26} = imm{11}; + let Inst{14-12} = imm{10-8}; + let Inst{7-0} = imm{7-0}; +} + + +class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<12> ShiftedRm; + + let Inst{11-8} = Rd; + let Inst{3-0} = ShiftedRm{3-0}; + let Inst{5-4} = ShiftedRm{6-5}; + let Inst{14-12} = ShiftedRm{11-9}; + let Inst{7-6} = ShiftedRm{8-7}; +} + +class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2sI<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<12> ShiftedRm; + + let Inst{11-8} = Rd; + let Inst{3-0} = ShiftedRm{3-0}; + let Inst{5-4} = ShiftedRm{6-5}; + let Inst{14-12} = ShiftedRm{11-9}; + let Inst{7-6} = ShiftedRm{8-7}; +} + +class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rn; + bits<12> ShiftedRm; + + let Inst{19-16} = Rn; + let Inst{3-0} = ShiftedRm{3-0}; + let Inst{5-4} = ShiftedRm{6-5}; + let Inst{14-12} = ShiftedRm{11-9}; + let Inst{7-6} = ShiftedRm{8-7}; +} + +class T2TwoReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rm; + + let Inst{11-8} = Rd; + let Inst{3-0} = Rm; +} + +class T2sTwoReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2sI<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rm; + + let Inst{11-8} = Rd; + let Inst{3-0} = Rm; +} + +class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rn; + bits<4> Rm; + + let Inst{19-16} = Rn; + let Inst{3-0} = Rm; +} + + +class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{26} = imm{11}; + let Inst{14-12} = imm{10-8}; + let Inst{7-0} = imm{7-0}; +} + +class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2sI<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{26} = imm{11}; + let Inst{14-12} = imm{10-8}; + let Inst{7-0} = imm{7-0}; +} + +class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rm; + bits<5> imm; + + let Inst{11-8} = Rd; + let Inst{3-0} = Rm; + let Inst{14-12} = imm{4-2}; + let Inst{7-6} = imm{1-0}; +} + +class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2sI<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rm; + bits<5> imm; + + let Inst{11-8} = Rd; + let Inst{3-0} = Rm; + let Inst{14-12} = imm{4-2}; + let Inst{7-6} = imm{1-0}; +} + +class T2ThreeReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{3-0} = Rm; +} + +class T2sThreeReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2sI<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{3-0} = Rm; +} + +class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<12> ShiftedRm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{3-0} = ShiftedRm{3-0}; + let Inst{5-4} = ShiftedRm{6-5}; + let Inst{14-12} = ShiftedRm{11-9}; + let Inst{7-6} = ShiftedRm{8-7}; +} + +class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2sI<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<12> ShiftedRm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{3-0} = ShiftedRm{3-0}; + let Inst{5-4} = ShiftedRm{6-5}; + let Inst{14-12} = ShiftedRm{11-9}; + let Inst{7-6} = ShiftedRm{8-7}; +} + +class T2FourReg<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + bits<4> Ra; + + let Inst{19-16} = Rn; + let Inst{15-12} = Ra; + let Inst{11-8} = Rd; + let Inst{3-0} = Rm; +} + +class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> RdLo; + bits<4> RdHi; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-23} = 0b111110111; + let Inst{22-20} = opc22_20; + let Inst{19-16} = Rn; + let Inst{15-12} = RdLo; + let Inst{11-8} = RdHi; + let Inst{7-4} = opc7_4; + let Inst{3-0} = Rm; +} + + /// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// unary operation that produces a value. These are predicable and can be /// changed to modify CPSR. -multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Cheap = 0, bit ReMat = 0> { +multiclass T2I_un_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { // shifted imm - def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, - opc, "\t$dst, $src", - [(set rGPR:$dst, (opnode t2_so_imm:$src))]> { + def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, + opc, "\t$Rd, $imm", + [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } // register - def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr, - opc, ".w\t$dst, $src", - [(set rGPR:$dst, (opnode rGPR:$src))]> { + def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir, + opc, ".w\t$Rd, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. let Inst{19-16} = 0b1111; // Rn let Inst{14-12} = 0b000; // imm3 let Inst{7-6} = 0b00; // imm2 let Inst{5-4} = 0b00; // type } // shifted register - def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, - opc, ".w\t$dst, $src", - [(set rGPR:$dst, (opnode t2_so_reg:$src))]> { + def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis, + opc, ".w\t$Rd, $ShiftedRm", + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. let Inst{19-16} = 0b1111; // Rn } } @@ -213,94 +473,97 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. -multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0, string wide = ""> { +multiclass T2I_bin_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0, string wide = ""> { // shifted imm - def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - opc, "\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> { + def ri : T2sTwoRegImm< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii, + opc, "\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. let Inst{15} = 0; } // register - def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, - opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> { + def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir, + opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"), + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. let Inst{14-12} = 0b000; // imm3 let Inst{7-6} = 0b00; // imm2 let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), - [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> { + def rs : T2sTwoRegShiftedReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis, + opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"), + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. } } /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need // the ".w" prefix to indicate that they are wide. -multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> : - T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">; +multiclass T2I_bin_w_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> : + T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">; /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are /// reversed. The 'rr' form is only defined for the disassembler; for codegen /// it is equivalent to the T2I_bin_irs counterpart. multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, - opc, ".w\t$dst, $rhs, $lhs", - [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> { + def ri : T2sTwoRegImm< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, + opc, ".w\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. let Inst{15} = 0; } // register - def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr, - opc, "\t$dst, $rhs, $lhs", + def rr : T2sThreeReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, + opc, "\t$Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. let Inst{14-12} = 0b000; // imm3 let Inst{7-6} = 0b00; // imm2 let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, - opc, "\t$dst, $rhs, $lhs", - [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> { + def rs : T2sTwoRegShiftedReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = ?; // The S bit. } } /// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the /// instruction modifies the CPSR register. -let Defs = [CPSR] in { -multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { +let isCodeGenOnly = 1, Defs = [CPSR] in { +multiclass T2I_bin_s_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + def ri : T2TwoRegImm< + (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii, + !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -308,9 +571,10 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr, - !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> { + def rr : T2ThreeReg< + (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir, + !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -321,9 +585,10 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + def rs : T2TwoRegShiftedReg< + (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis, + !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -340,51 +605,58 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { - def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> { + def ri : T2sTwoRegImm< + (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi, + opc, ".w\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24} = 1; let Inst{23-21} = op23_21; - let Inst{20} = 0; // The S bit. let Inst{15} = 0; } } // 12-bit imm - def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, - !strconcat(opc, "w"), "\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> { + def ri12 : T2I< + (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, + !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> { + bits<4> Rd; + bits<4> Rn; + bits<12> imm; let Inst{31-27} = 0b11110; - let Inst{25} = 1; - let Inst{24} = 0; + let Inst{26} = imm{11}; + let Inst{25-24} = 0b10; let Inst{23-21} = op23_21; let Inst{20} = 0; // The S bit. + let Inst{19-16} = Rn; let Inst{15} = 0; + let Inst{14-12} = imm{10-8}; + let Inst{11-8} = Rd; + let Inst{7-0} = imm{7-0}; } // register - def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> { + def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr, + opc, ".w\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; let Inst{23-21} = op23_21; - let Inst{20} = 0; // The S bit. let Inst{14-12} = 0b000; // imm3 let Inst{7-6} = 0b00; // imm2 let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> { + def rs : T2sTwoRegShiftedReg< + (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), + IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24} = 1; let Inst{23-21} = op23_21; - let Inst{20} = 0; // The S bit. } } @@ -395,50 +667,49 @@ let Uses = [CPSR] in { multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - opc, "\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>, + def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), + IIC_iALUi, opc, "\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. let Inst{15} = 0; } // register - def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>, + def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, + opc, ".w\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. let Inst{14-12} = 0b000; // imm3 let Inst{7-6} = 0b00; // imm2 let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>, + def rs : T2sTwoRegShiftedReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. } } // Carry setting variants -let Defs = [CPSR] in { +let isCodeGenOnly = 1, Defs = [CPSR] in { multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - opc, "\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>, + def ri : T2sTwoRegImm< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, + opc, "\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -447,9 +718,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>, + def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, + opc, ".w\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, Requires<[IsThumb2]> { let isCommutable = Commutable; let Inst{31-27} = 0b11101; @@ -461,9 +732,10 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>, + def rs : T2sTwoRegShiftedReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, Requires<[IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -476,12 +748,13 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register /// version is not needed since this is only for codegen. -let Defs = [CPSR] in { +let isCodeGenOnly = 1, Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, - !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs", - [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> { + def ri : T2TwoRegImm< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, + !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -489,9 +762,10 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { let Inst{15} = 0; } // shifted register - def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, - !strconcat(opc, "s"), "\t$dst, $rhs, $lhs", - [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> { + def rs : T2TwoRegShiftedReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm", + [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -504,18 +778,20 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // rotate operation that produces a value. multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { // 5-bit imm - def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> { + def ri : T2sTwoRegShiftImm< + (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi, + opc, ".w\t$Rd, $Rm, $imm", + [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> { let Inst{31-27} = 0b11101; let Inst{26-21} = 0b010010; let Inst{19-16} = 0b1111; // Rn let Inst{5-4} = opcod; } // register - def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr, - opc, ".w\t$dst, $lhs, $rhs", - [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> { + def rr : T2sThreeReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr, + opc, ".w\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-21} = opcod; @@ -528,11 +804,14 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> { /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. let isCompare = 1, Defs = [CPSR] in { -multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { +multiclass T2I_cmp_irs<bits<4> opcod, string opc, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, + PatFrag opnode> { // shifted imm - def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi, - opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, t2_so_imm:$rhs)]> { + def ri : T2OneRegCmpImm< + (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii, + opc, ".w\t$Rn, $imm", + [(opnode GPR:$Rn, t2_so_imm:$imm)]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -541,7 +820,8 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { let Inst{11-8} = 0b1111; // Rd } // register - def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr, + def rr : T2TwoRegCmp< + (outs), (ins GPR:$lhs, rGPR:$rhs), iir, opc, ".w\t$lhs, $rhs", [(opnode GPR:$lhs, rGPR:$rhs)]> { let Inst{31-27} = 0b11101; @@ -554,9 +834,10 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi, - opc, ".w\t$lhs, $rhs", - [(opnode GPR:$lhs, t2_so_reg:$rhs)]> { + def rs : T2OneRegCmpShiftedReg< + (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis, + opc, ".w\t$Rn, $ShiftedRm", + [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -567,20 +848,29 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> { } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. -multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> { - def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi, - opc, ".w\t$dst, $addr", - [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> { +multiclass T2I_ld<bit signed, bits<2> opcod, string opc, + InstrItinClass iii, InstrItinClass iis, PatFrag opnode> { + def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii, + opc, ".w\t$Rt, $addr", + [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; let Inst{23} = 1; let Inst{22-21} = opcod; let Inst{20} = 1; // load + + bits<4> Rt; + let Inst{15-12} = Rt; + + bits<17> addr; + let Inst{19-16} = addr{16-13}; // Rn + let Inst{23} = addr{12}; // U + let Inst{11-0} = addr{11-0}; // imm } - def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, - opc, "\t$dst, $addr", - [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]> { + def i8 : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii, + opc, "\t$Rt, $addr", + [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; @@ -591,10 +881,18 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> { // Offset: index==TRUE, wback==FALSE let Inst{10} = 1; // The P bit. let Inst{8} = 0; // The W bit. + + bits<4> Rt; + let Inst{15-12} = Rt; + + bits<13> addr; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{9} = addr{8}; // U + let Inst{7-0} = addr{7-0}; // imm } - def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr, - opc, ".w\t$dst, $addr", - [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> { + def s : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis, + opc, ".w\t$Rt, $addr", + [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; @@ -602,10 +900,20 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> { let Inst{22-21} = opcod; let Inst{20} = 1; // load let Inst{11-6} = 0b000000; + + bits<4> Rt; + let Inst{15-12} = Rt; + + bits<10> addr; + let Inst{19-16} = addr{9-6}; // Rn + let Inst{3-0} = addr{5-2}; // Rm + let Inst{5-4} = addr{1-0}; // imm } - def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi, - opc, ".w\t$dst, $addr", - [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> { + + // FIXME: Is the pci variant actually needed? + def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii, + opc, ".w\t$Rt, $addr", + [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { let isReMaterializable = 1; let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -614,22 +922,35 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> { let Inst{22-21} = opcod; let Inst{20} = 1; // load let Inst{19-16} = 0b1111; // Rn + bits<4> Rt; + bits<12> addr; + let Inst{15-12} = Rt{3-0}; + let Inst{11-0} = addr{11-0}; } } /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. -multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { - def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei, - opc, ".w\t$src, $addr", - [(opnode GPR:$src, t2addrmode_imm12:$addr)]> { +multiclass T2I_st<bits<2> opcod, string opc, + InstrItinClass iii, InstrItinClass iis, PatFrag opnode> { + def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii, + opc, ".w\t$Rt, $addr", + [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0001; let Inst{22-21} = opcod; let Inst{20} = 0; // !load + + bits<4> Rt; + let Inst{15-12} = Rt; + + bits<17> addr; + let Inst{19-16} = addr{16-13}; // Rn + let Inst{23} = addr{12}; // U + let Inst{11-0} = addr{11-0}; // imm } - def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei, - opc, "\t$src, $addr", - [(opnode GPR:$src, t2addrmode_imm8:$addr)]> { + def i8 : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii, + opc, "\t$Rt, $addr", + [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; @@ -638,24 +959,40 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { // Offset: index==TRUE, wback==FALSE let Inst{10} = 1; // The P bit. let Inst{8} = 0; // The W bit. + + bits<4> Rt; + let Inst{15-12} = Rt; + + bits<13> addr; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{9} = addr{8}; // U + let Inst{7-0} = addr{7-0}; // imm } - def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer, - opc, ".w\t$src, $addr", - [(opnode GPR:$src, t2addrmode_so_reg:$addr)]> { + def s : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis, + opc, ".w\t$Rt, $addr", + [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0000; let Inst{22-21} = opcod; let Inst{20} = 0; // !load let Inst{11-6} = 0b000000; + + bits<4> Rt; + let Inst{15-12} = Rt; + + bits<10> addr; + let Inst{19-16} = addr{9-6}; // Rn + let Inst{3-0} = addr{5-2}; // Rm + let Inst{5-4} = addr{1-0}; // imm } } -/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a +/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, - opc, ".w\t$dst, $src", - [(set rGPR:$dst, (opnode rGPR:$src))]> { +multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> { + def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, + opc, ".w\t$Rd, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rm))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -664,25 +1001,27 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, - opc, ".w\t$dst, $src, ror $rot", - [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> { + def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr, + opc, ".w\t$Rd, $Rm, ror $rot", + [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; let Inst{19-16} = 0b1111; // Rn let Inst{15-12} = 0b1111; let Inst{7} = 1; - let Inst{5-4} = {?,?}; // rotate + + bits<2> rot; + let Inst{5-4} = rot{1-0}; // rotate } } // UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. -multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { - def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, - opc, "\t$dst, $src", - [(set rGPR:$dst, (opnode rGPR:$src))]>, - Requires<[HasT2ExtractPack]> { +multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { + def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, + opc, "\t$Rd, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rm))]>, + Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -691,25 +1030,27 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, - opc, "\t$dst, $src, ror $rot", - [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack]> { + def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot), + IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot", + [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; let Inst{19-16} = 0b1111; // Rn let Inst{15-12} = 0b1111; let Inst{7} = 1; - let Inst{5-4} = {?,?}; // rotate + + bits<2> rot; + let Inst{5-4} = rot{1-0}; // rotate } } // SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern // supported yet. -multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { - def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, - opc, "\t$dst, $src", []> { +multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> { + def r : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr, + opc, "\t$Rd, $Rm", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -718,25 +1059,27 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi, - opc, "\t$dst, $src, ror $rot", []> { + def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr, + opc, "\t$Rd, $Rm, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; let Inst{19-16} = 0b1111; // Rn let Inst{15-12} = 0b1111; let Inst{7} = 1; - let Inst{5-4} = {?,?}; // rotate + + bits<2> rot; + let Inst{5-4} = rot{1-0}; // rotate } } -/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a +/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. -multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { - def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, - opc, "\t$dst, $LHS, $RHS", - [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>, - Requires<[HasT2ExtractPack]> { +multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> { + def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr, + opc, "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, + Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -744,25 +1087,28 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), - IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", - [(set rGPR:$dst, (opnode rGPR:$LHS, - (rotr rGPR:$RHS, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack]> { + def rr_rot : T2ThreeReg<(outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot), + IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", + [(set rGPR:$Rd, (opnode rGPR:$Rn, + (rotr rGPR:$Rm, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; let Inst{15-12} = 0b1111; let Inst{7} = 1; - let Inst{5-4} = {?,?}; // rotate + + bits<2> rot; + let Inst{5-4} = rot{1-0}; // rotate } } // DO variant - disassembly only, no pattern -multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { - def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr, - opc, "\t$dst, $LHS, $RHS", []> { +multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> { + def rr : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr, + opc, "\t$Rd, $Rn, $Rm", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -770,14 +1116,16 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { let Inst{7} = 1; let Inst{5-4} = 0b00; // rotate } - def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot), - IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> { + def rr_rot : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot), + IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; let Inst{15-12} = 0b1111; let Inst{7} = 1; - let Inst{5-4} = {?,?}; // rotate + + bits<2> rot; + let Inst{5-4} = rot{1-0}; // rotate } } @@ -789,24 +1137,23 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { // Miscellaneous Instructions. // +class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin, + string asm, list<dag> pattern> + : T2XI<oops, iops, itin, asm, pattern> { + bits<4> Rd; + bits<12> label; + + let Inst{11-8} = Rd; + let Inst{26} = label{11}; + let Inst{14-12} = label{10-8}; + let Inst{7-0} = label{7-0}; +} + // LEApcrel - Load a pc-relative address into a register without offending the // assembler. -let neverHasSideEffects = 1 in { -let isReMaterializable = 1 in -def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr${p}.w\t$dst, #$label", []> { - let Inst{31-27} = 0b11110; - let Inst{25-24} = 0b10; - // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) - let Inst{22} = 0; - let Inst{20} = 0; - let Inst{19-16} = 0b1111; // Rn - let Inst{15} = 0; -} -} // neverHasSideEffects -def t2LEApcrelJT : T2XI<(outs rGPR:$dst), - (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, - "adr${p}.w\t$dst, #${label}_${id}", []> { +def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), + (ins t2adrlabel:$addr, pred:$p), + IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> { let Inst{31-27} = 0b11110; let Inst{25-24} = 0b10; // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE) @@ -814,76 +1161,88 @@ def t2LEApcrelJT : T2XI<(outs rGPR:$dst), let Inst{20} = 0; let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; -} + bits<4> Rd; + bits<13> addr; + let Inst{11-8} = Rd; + let Inst{23} = addr{12}; + let Inst{21} = addr{12}; + let Inst{26} = addr{11}; + let Inst{14-12} = addr{10-8}; + let Inst{7-0} = addr{7-0}; +} + +let neverHasSideEffects = 1, isReMaterializable = 1 in +def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), + Size4Bytes, IIC_iALUi, []>; +def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd), + (ins i32imm:$label, nohash_imm:$id, pred:$p), + Size4Bytes, IIC_iALUi, + []>; + + +// FIXME: None of these add/sub SP special instructions should be necessary +// at all for thumb2 since they use the same encodings as the generic +// add/sub instructions. In thumb1 we need them since they have dedicated +// encodings. At the least, they should be pseudo instructions. // ADD r, sp, {so_imm|i12} -def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - IIC_iALUi, "add", ".w\t$dst, $sp, $imm", []> { +let isCodeGenOnly = 1 in { +def t2ADDrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), + IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b1000; - let Inst{20} = ?; // The S bit. - let Inst{19-16} = 0b1101; // Rn = sp let Inst{15} = 0; } -def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> { +def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), + IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> { let Inst{31-27} = 0b11110; - let Inst{25} = 1; - let Inst{24-21} = 0b0000; - let Inst{20} = 0; // The S bit. - let Inst{19-16} = 0b1101; // Rn = sp + let Inst{25-20} = 0b100000; let Inst{15} = 0; } // ADD r, sp, so_reg -def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - IIC_iALUsi, "add", ".w\t$dst, $sp, $rhs", []> { +def t2ADDrSPs : T2sTwoRegShiftedReg< + (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), + IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b1000; - let Inst{20} = ?; // The S bit. - let Inst{19-16} = 0b1101; // Rn = sp let Inst{15} = 0; } // SUB r, sp, {so_imm|i12} -def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - IIC_iALUi, "sub", ".w\t$dst, $sp, $imm", []> { +def t2SUBrSPi : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), + IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b1101; - let Inst{20} = ?; // The S bit. - let Inst{19-16} = 0b1101; // Rn = sp let Inst{15} = 0; } -def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - IIC_iALUi, "subw", "\t$dst, $sp, $imm", []> { +def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), + IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> { let Inst{31-27} = 0b11110; - let Inst{25} = 1; - let Inst{24-21} = 0b0101; - let Inst{20} = 0; // The S bit. - let Inst{19-16} = 0b1101; // Rn = sp + let Inst{25-20} = 0b101010; let Inst{15} = 0; } // SUB r, sp, so_reg -def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), +def t2SUBrSPs : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm), IIC_iALUsi, - "sub", "\t$dst, $sp, $rhs", []> { + "sub", "\t$Rd, $Rn, $imm", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b1101; - let Inst{20} = ?; // The S bit. let Inst{19-16} = 0b1101; // Rn = sp let Inst{15} = 0; } +} // end isCodeGenOnly = 1 // Signed and unsigned division on v7-M -def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, - "sdiv", "\t$dst, $a, $b", - [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>, - Requires<[HasDivide]> { +def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, + "sdiv", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, + Requires<[HasDivide, IsThumb2]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; let Inst{20} = 0b1; @@ -891,10 +1250,10 @@ def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, let Inst{7-4} = 0b1111; } -def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, - "udiv", "\t$dst, $a, $b", - [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>, - Requires<[HasDivide]> { +def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi, + "udiv", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, + Requires<[HasDivide, IsThumb2]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; let Inst{20} = 0b1; @@ -908,26 +1267,26 @@ def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi, // Load let canFoldAsLoad = 1, isReMaterializable = 1 in -defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>; +defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, + UnOpFrag<(load node:$Src)>>; // Loads with zero extension -defm t2LDRH : T2I_ld<0, 0b01, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; -defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; +defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, + UnOpFrag<(zextloadi16 node:$Src)>>; +defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, + UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension -defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; -defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; +defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, + UnOpFrag<(sextloadi16 node:$Src)>>; +defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, + UnOpFrag<(sextloadi8 node:$Src)>>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword -def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), +def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>; -def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2), - (ins i32imm:$addr), IIC_iLoadi, - "ldrd", "\t$dst1, $addr", []> { - let Inst{19-16} = 0b1111; // Rn -} + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 @@ -976,70 +1335,71 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), // not via pattern. // Indexed loads + let mayLoad = 1, neverHasSideEffects = 1 in { -def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), +def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", + AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, + "ldr", "\t$Rt, $addr!", "$addr.base = $Rn", []>; -def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", +def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn), + (ins GPR:$base, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePost, IIC_iLoad_iu, + "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn", []>; -def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb), +def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", + AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, + "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn", []>; -def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", +def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn), + (ins GPR:$base, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn", []>; -def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb), +def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", + AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, + "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn", []>; -def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", +def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn), + (ins GPR:$base, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn", []>; -def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb), +def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", + AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, + "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn", []>; -def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", +def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn), + (ins GPR:$base, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn", []>; -def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb), +def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", + AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu, + "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn", []>; -def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", +def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn), + (ins GPR:$base, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, + "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn", []>; -} // mayLoad = 1, neverHasSideEffects = 1 +} // mayLoad = 1, neverHasSideEffects = 1 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are // for disassembly only. // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 -class T2IldT<bit signed, bits<2> type, string opc> - : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, - "\t$dst, $addr", []> { +class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> + : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + "\t$Rt, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = signed; @@ -1048,74 +1408,83 @@ class T2IldT<bit signed, bits<2> type, string opc> let Inst{20} = 1; // load let Inst{11} = 1; let Inst{10-8} = 0b110; // PUW. + + bits<4> Rt; + bits<13> addr; + let Inst{15-12} = Rt; + let Inst{19-16} = addr{12-9}; + let Inst{7-0} = addr{7-0}; } -def t2LDRT : T2IldT<0, 0b10, "ldrt">; -def t2LDRBT : T2IldT<0, 0b00, "ldrbt">; -def t2LDRHT : T2IldT<0, 0b01, "ldrht">; -def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">; -def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">; +def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>; +def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>; +def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>; +def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>; +def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>; // Store -defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; -defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; -defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; +defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, + BinOpFrag<(store node:$LHS, node:$RHS)>>; +defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si, + BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; +defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, + BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), - (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr), - IIC_iStorer, "strd", "\t$src1, $addr", []>; + (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr), + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>; // Indexed stores def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, IIC_iStoreiu, - "str", "\t$src, [$base, $offset]!", "$base = $base_wb", + (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePre, IIC_iStore_iu, + "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", [(set GPR:$base_wb, - (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iStoreiu, - "str", "\t$src, [$base], $offset", "$base = $base_wb", + (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePost, IIC_iStore_iu, + "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", [(set GPR:$base_wb, - (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, IIC_iStoreiu, - "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", + (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePre, IIC_iStore_iu, + "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", [(set GPR:$base_wb, - (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iStoreiu, - "strh", "\t$src, [$base], $offset", "$base = $base_wb", + (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, + "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", [(set GPR:$base_wb, - (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, IIC_iStoreiu, - "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", + (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, + "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", [(set GPR:$base_wb, - (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, IIC_iStoreiu, - "strb", "\t$src, [$base], $offset", "$base = $base_wb", + (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), + AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, + "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", [(set GPR:$base_wb, - (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly // only. // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 -class T2IstT<bits<2> type, string opc> - : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc, - "\t$src, $addr", []> { +class T2IstT<bits<2> type, string opc, InstrItinClass ii> + : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + "\t$Rt, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; let Inst{24} = 0; // not signed @@ -1124,51 +1493,62 @@ class T2IstT<bits<2> type, string opc> let Inst{20} = 0; // store let Inst{11} = 1; let Inst{10-8} = 0b110; // PUW + + bits<4> Rt; + bits<13> addr; + let Inst{15-12} = Rt; + let Inst{19-16} = addr{12-9}; + let Inst{7-0} = addr{7-0}; } -def t2STRT : T2IstT<0b10, "strt">; -def t2STRBT : T2IstT<0b00, "strbt">; -def t2STRHT : T2IstT<0b01, "strht">; +def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>; +def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>; +def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // ldrd / strd pre / post variants // For disassembly only. -def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary, - "ldrd", "\t$dst1, $dst2, [$base, $imm]!", []>; +def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2), + (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>; -def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$dst1, GPR:$dst2), - (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary, - "ldrd", "\t$dst1, $dst2, [$base], $imm", []>; +def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2), + (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>; def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs), - (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm), - NoItinerary, "strd", "\t$src1, $src2, [$base, $imm]!", []>; + (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), + IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>; def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), - (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm), - NoItinerary, "strd", "\t$src1, $src2, [$base], $imm", []>; + (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), + IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future // data/instruction access. These are for disassembly only. -// -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -multiclass T2Ipl<bit instr, bit write, string opc> { +// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0), +// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1). +multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { - def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc, - "\t[$base, $imm]", []> { + def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc, + "\t$addr", + [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; - let Inst{23} = 1; // U = 1 let Inst{22} = 0; let Inst{21} = write; let Inst{20} = 1; let Inst{15-12} = 0b1111; + + bits<17> addr; + let Inst{19-16} = addr{16-13}; // Rn + let Inst{23} = addr{12}; // U + let Inst{11-0} = addr{11-0}; // imm12 } - def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc, - "\t[$base, $imm]", []> { + def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc, + "\t$addr", + [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // U = 0 @@ -1177,22 +1557,15 @@ multiclass T2Ipl<bit instr, bit write, string opc> { let Inst{20} = 1; let Inst{15-12} = 0b1111; let Inst{11-8} = 0b1100; - } - def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc, - "\t[pc, $imm]", []> { - let Inst{31-25} = 0b1111100; - let Inst{24} = instr; - let Inst{23} = ?; // add = (U == 1) - let Inst{22} = 0; - let Inst{21} = write; - let Inst{20} = 1; - let Inst{19-16} = 0b1111; // Rn = 0b1111 - let Inst{15-12} = 0b1111; + bits<13> addr; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{7-0} = addr{7-0}; // imm8 } - def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc, - "\t[$base, $a]", []> { + def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc, + "\t$addr", + [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> { let Inst{31-25} = 0b1111100; let Inst{24} = instr; let Inst{23} = 0; // add = TRUE for T1 @@ -1201,133 +1574,174 @@ multiclass T2Ipl<bit instr, bit write, string opc> { let Inst{20} = 1; let Inst{15-12} = 0b1111; let Inst{11-6} = 0000000; - let Inst{5-4} = 0b00; // no shift is applied - } - def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc, - "\t[$base, $a, lsl $shamt]", []> { - let Inst{31-25} = 0b1111100; - let Inst{24} = instr; - let Inst{23} = 0; // add = TRUE for T1 - let Inst{22} = 0; - let Inst{21} = write; - let Inst{20} = 1; - let Inst{15-12} = 0b1111; - let Inst{11-6} = 0000000; + bits<10> addr; + let Inst{19-16} = addr{9-6}; // Rn + let Inst{3-0} = addr{5-2}; // Rm + let Inst{5-4} = addr{1-0}; // imm2 } } -defm t2PLD : T2Ipl<0, 0, "pld">; -defm t2PLDW : T2Ipl<0, 1, "pldw">; -defm t2PLI : T2Ipl<1, 0, "pli">; +defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>; +defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>; +defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, - reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b00; - let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' - let Inst{22} = 0; - let Inst{21} = 0; // The W bit. - let Inst{20} = 1; // Load -} +multiclass thumb2_ldst_mult<string asm, InstrItinClass itin, + InstrItinClass itin_upd, bit L_bit> { + def IA : + T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> { + bits<4> Rn; + bits<16> regs; -def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts", - "$addr.addr = $wb", []> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b00; - let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' - let Inst{22} = 0; - let Inst{21} = 1; // The W bit. - let Inst{20} = 1; // Load -} -} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = 0; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15-0} = regs; + } + def IA_UPD : + T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> { + bits<4> Rn; + bits<16> regs; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { -def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, - reglist:$srcs, variable_ops), IIC_iStorem, - "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b00; - let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' - let Inst{22} = 0; - let Inst{21} = 0; // The W bit. - let Inst{20} = 0; // Store -} + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = 0; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15-0} = regs; + } + def DB : + T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> { + bits<4> Rn; + bits<16> regs; -def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$srcs, variable_ops), - IIC_iStorem, - "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs", - "$addr.addr = $wb", []> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b00; - let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' - let Inst{22} = 0; - let Inst{21} = 1; // The W bit. - let Inst{20} = 0; // Store + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = 0; + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15-0} = regs; + } + def DB_UPD : + T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), + itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> { + bits<4> Rn; + bits<16> regs; + + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b00; + let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = 0; + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + let Inst{19-16} = Rn; + let Inst{15-0} = regs; + } } -} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq + +let neverHasSideEffects = 1 in { + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; + +} // neverHasSideEffects + //===----------------------------------------------------------------------===// // Move Instructions. // let neverHasSideEffects = 1 in -def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov", ".w\t$dst, $src", []> { +def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, + "mov", ".w\t$Rd, $Rm", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; - let Inst{20} = ?; // The S bit. let Inst{19-16} = 0b1111; // Rn let Inst{14-12} = 0b000; let Inst{7-4} = 0b0000; } // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. -let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in -def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, - "mov", ".w\t$dst, $src", - [(set rGPR:$dst, t2_so_imm:$src)]> { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + AddedComplexity = 1 in +def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, + "mov", ".w\t$Rd, $imm", + [(set rGPR:$Rd, t2_so_imm:$imm)]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b0010; - let Inst{20} = ?; // The S bit. let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi, - "movw", "\t$dst, $src", - [(set rGPR:$dst, imm0_65535:$src)]> { +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in +def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi, + "movw", "\t$Rd, $imm", + [(set rGPR:$Rd, imm0_65535:$imm)]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0010; let Inst{20} = 0; // The S bit. let Inst{15} = 0; + + bits<4> Rd; + bits<16> imm; + + let Inst{11-8} = Rd; + let Inst{19-16} = imm{15-12}; + let Inst{26} = imm{11}; + let Inst{14-12} = imm{10-8}; + let Inst{7-0} = imm{7-0}; } -let Constraints = "$src = $dst" in -def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi, - "movt", "\t$dst, $imm", - [(set rGPR:$dst, +def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + +let Constraints = "$src = $Rd" in { +def t2MOVTi16 : T2I<(outs rGPR:$Rd), + (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi, + "movt", "\t$Rd, $imm", + [(set rGPR:$Rd, (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-21} = 0b0110; let Inst{20} = 0; // The S bit. let Inst{15} = 0; + + bits<4> Rd; + bits<16> imm; + + let Inst{11-8} = Rd; + let Inst{19-16} = imm{15-12}; + let Inst{26} = imm{11}; + let Inst{14-12} = imm{10-8}; + let Inst{7-0} = imm{7-0}; } +def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), + (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; +} // Constraints + def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; //===----------------------------------------------------------------------===// @@ -1336,28 +1750,28 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>; // Sign extenders -defm t2SXTB : T2I_unary_rrot<0b100, "sxtb", +defm t2SXTB : T2I_ext_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; -defm t2SXTH : T2I_unary_rrot<0b000, "sxth", +defm t2SXTH : T2I_ext_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">; +defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">; -defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab", +defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; -defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah", +defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; -defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">; +defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">; // TODO: SXT(A){B|H}16 - done for disassembly only // Zero extenders let AddedComplexity = 16 in { -defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", +defm t2UXTB : T2I_ext_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; -defm t2UXTH : T2I_unary_rrot<0b001, "uxth", +defm t2UXTH : T2I_ext_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", +defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; // FIXME: This pattern incorrectly assumes the shl operator is a rotate. @@ -1365,15 +1779,17 @@ defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", // instead so we can include a check for masking back in the upper // eight bits of the source into the lower eight bits of the result. //def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF), -// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; +// (t2UXTB16r_rot rGPR:$Src, 24)>, +// Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; + (t2UXTB16r_rot rGPR:$Src, 8)>, + Requires<[HasT2ExtractPack, IsThumb2]>; -defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", +defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; -defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah", +defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; -defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">; +defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">; } //===----------------------------------------------------------------------===// @@ -1387,8 +1803,10 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub", // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants. defm t2ADDS : T2I_bin_s_irs <0b1000, "add", + IIC_iALUi, IIC_iALUr, IIC_iALUsi, BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; defm t2SUBS : T2I_bin_s_irs <0b1101, "sub", + IIC_iALUi, IIC_iALUr, IIC_iALUsi, BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", @@ -1436,8 +1854,8 @@ def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm), // Select Bytes -- for disassembly only -def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel", - "\t$dst, $a, $b", []> { +def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b010; let Inst{23} = 0b1; @@ -1450,28 +1868,41 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel", // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) // And Miscellaneous operations -- for disassembly only class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc, - list<dag> pat = [/* For disassembly only; pattern left blank */]> - : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc, - "\t$dst, $a, $b", pat> { + list<dag> pat = [/* For disassembly only; pattern left blank */], + dag iops = (ins rGPR:$Rn, rGPR:$Rm), + string asm = "\t$Rd, $Rn, $Rm"> + : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0101; let Inst{22-20} = op22_20; let Inst{15-12} = 0b1111; let Inst{7-4} = op7_4; + + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{3-0} = Rm; } // Saturating add/subtract -- for disassembly only def t2QADD : T2I_pam<0b000, 0b1000, "qadd", - [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>; + [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))], + (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; -def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">; -def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">; +def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [], + (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; +def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [], + (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; def t2QSUB : T2I_pam<0b000, 0b1010, "qsub", - [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>; + [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))], + (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; @@ -1511,21 +1942,61 @@ def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">; def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">; def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; +// Helper class for disassembly only +// A6.3.16 & A6.3.17 +// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions. +class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, + dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> + : T2ThreeReg<oops, iops, itin, opc, asm, pattern> { + let Inst{31-27} = 0b11111; + let Inst{26-24} = 0b011; + let Inst{23} = long; + let Inst{22-20} = op22_20; + let Inst{7-4} = op7_4; +} + +class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, + dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> + : T2FourReg<oops, iops, itin, opc, asm, pattern> { + let Inst{31-27} = 0b11111; + let Inst{26-24} = 0b011; + let Inst{23} = long; + let Inst{22-20} = op22_20; + let Inst{7-4} = op7_4; +} + // Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only -def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst), - (ins rGPR:$a, rGPR:$b), - NoItinerary, "usad8", "\t$dst, $a, $b", []> { +def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm), + NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []> { let Inst{15-12} = 0b1111; } -def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst), - (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8", - "\t$dst, $a, $b, $acc", []>; +def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary, + "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>; // Signed/Unsigned saturate -- for disassembly only -def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), - NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh", +class T2SatI<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<4> Rn; + bits<5> sat_imm; + bits<7> sh; + + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{4-0} = sat_imm{4-0}; + let Inst{21} = sh{6}; + let Inst{14-12} = sh{4-2}; + let Inst{7-6} = sh{1-0}; +} + +def t2SSAT: T2SatI< + (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh), + NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; @@ -1533,8 +2004,9 @@ def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), let Inst{15} = 0; } -def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, - "ssat16", "\t$dst, $bit_pos, $a", +def t2SSAT16: T2SatI< + (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary, + "ssat16", "\t$Rd, $sat_imm, $Rn", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1100; @@ -1545,8 +2017,9 @@ def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, let Inst{7-6} = 0b00; // imm2 = '00' } -def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), - NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh", +def t2USAT: T2SatI< + (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh), + NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1110; @@ -1554,8 +2027,9 @@ def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh), let Inst{15} = 0; } -def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary, - "usat16", "\t$dst, $bit_pos, $a", +def t2USAT16: T2SatI< + (outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary, + "usat16", "\t$dst, $sat_imm, $Rn", [/* For disassembly only; pattern left blank */]> { let Inst{31-27} = 0b11110; let Inst{25-22} = 0b1110; @@ -1579,23 +2053,23 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; let Uses = [CPSR] in { -def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, - "rrx", "\t$dst, $src", - [(set rGPR:$dst, (ARMrrx rGPR:$src))]> { +def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, + "rrx", "\t$Rd, $Rm", + [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; - let Inst{20} = ?; // The S bit. let Inst{19-16} = 0b1111; // Rn let Inst{14-12} = 0b000; let Inst{7-4} = 0b0011; } } -let Defs = [CPSR] in { -def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, - "lsrs", ".w\t$dst, $src, #1", - [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> { +let isCodeGenOnly = 1, Defs = [CPSR] in { +def t2MOVsrl_flag : T2TwoRegShiftImm< + (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, + "lsrs", ".w\t$Rd, $Rm, #1", + [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1606,9 +2080,10 @@ def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, let Inst{14-12} = 0b000; let Inst{7-6} = 0b01; } -def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, - "asrs", ".w\t$dst, $src, #1", - [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> { +def t2MOVsra_flag : T2TwoRegShiftImm< + (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, + "asrs", ".w\t$Rd, $Rm, #1", + [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1626,39 +2101,67 @@ def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi, // defm t2AND : T2I_bin_w_irs<0b0000, "and", + IIC_iBITi, IIC_iBITr, IIC_iBITsi, BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; defm t2ORR : T2I_bin_w_irs<0b0010, "orr", + IIC_iBITi, IIC_iBITr, IIC_iBITsi, BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm t2EOR : T2I_bin_w_irs<0b0100, "eor", + IIC_iBITi, IIC_iBITr, IIC_iBITsi, BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; defm t2BIC : T2I_bin_w_irs<0b0001, "bic", + IIC_iBITi, IIC_iBITr, IIC_iBITsi, BinOpFrag<(and node:$LHS, (not node:$RHS))>>; -defm t2ANDS : T2I_bin_s_irs<0b0000, "and", - BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>; +class T2BitFI<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + bits<5> msb; + bits<5> lsb; + + let Inst{11-8} = Rd; + let Inst{4-0} = msb{4-0}; + let Inst{14-12} = lsb{4-2}; + let Inst{7-6} = lsb{1-0}; +} + +class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2BitFI<oops, iops, itin, opc, asm, pattern> { + bits<4> Rn; -let Constraints = "$src = $dst" in -def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm), - IIC_iUNAsi, "bfc", "\t$dst, $imm", - [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { + let Inst{19-16} = Rn; +} + +let Constraints = "$src = $Rd" in +def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), + IIC_iUNAsi, "bfc", "\t$Rd, $imm", + [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; + + bits<10> imm; + let msb{4-0} = imm{9-5}; + let lsb{4-0} = imm{4-0}; } -def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> { +def t2SBFX: T2TwoRegBitFI< + (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb), + IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10100; let Inst{15} = 0; } -def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> { +def t2UBFX: T2TwoRegBitFI< + (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb), + IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b11100; @@ -1666,24 +2169,50 @@ def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width), } // A8.6.18 BFI - Bitfield insert (Encoding T1) -let Constraints = "$src = $dst" in -def t2BFI : T2I<(outs rGPR:$dst), - (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm), - IIC_iALUi, "bfi", "\t$dst, $val, $imm", - [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val, - bf_inv_mask_imm:$imm))]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 1; - let Inst{24-20} = 0b10110; - let Inst{15} = 0; +let Constraints = "$src = $Rd" in { + def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd), + (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm), + IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm", + [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn, + bf_inv_mask_imm:$imm))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-20} = 0b10110; + let Inst{15} = 0; + + bits<10> imm; + let msb{4-0} = imm{9-5}; + let lsb{4-0} = imm{4-0}; + } + + // GNU as only supports this form of bfi (w/ 4 arguments) + let isAsmParserOnly = 1 in + def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd), + (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit, + width_imm:$width), + IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width", + []> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-20} = 0b10110; + let Inst{15} = 0; + + bits<5> lsbit; + bits<5> width; + let msb{4-0} = width; // Custom encoder => lsb+width-1 + let lsb{4-0} = lsbit; + } } -defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS, - (not node:$RHS))>, 0, "">; +defm t2ORN : T2I_bin_irs<0b0011, "orn", + IIC_iBITi, IIC_iBITr, IIC_iBITsi, + BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">; // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in -defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>; +defm t2MVN : T2I_un_irs <0b0011, "mvn", + IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi, + UnOpFrag<(not node:$Src)>, 1, 1>; let AddedComplexity = 1 in @@ -1702,9 +2231,9 @@ def : T2Pat<(t2_so_imm_not:$src), // Multiply Instructions. // let isCommutable = 1 in -def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, - "mul", "\t$dst, $a, $b", - [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> { +def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, + "mul", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; @@ -1712,83 +2241,63 @@ def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // Multiply } -def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, - "mla", "\t$dst, $a, $b, $c", - [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> { +def t2MLA: T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, + "mla", "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-4} = 0b0000; // Multiply } -def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, - "mls", "\t$dst, $a, $b, $c", - [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> { +def t2MLS: T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, + "mls", "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b000; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-4} = 0b0001; // Multiply and Subtract } // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), - (ins rGPR:$a, rGPR:$b), IIC_iMUL64, - "smull", "\t$ldst, $hdst, $a, $b", []> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0111; - let Inst{22-20} = 0b000; - let Inst{7-4} = 0b0000; -} - -def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), - (ins rGPR:$a, rGPR:$b), IIC_iMUL64, - "umull", "\t$ldst, $hdst, $a, $b", []> { - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0111; - let Inst{22-20} = 0b010; - let Inst{7-4} = 0b0000; -} +def t2SMULL : T2MulLong<0b000, 0b0000, + (outs rGPR:$Rd, rGPR:$Ra), + (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, + "smull", "\t$Rd, $Ra, $Rn, $Rm", []>; + +def t2UMULL : T2MulLong<0b010, 0b0000, + (outs rGPR:$RdLo, rGPR:$RdHi), + (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64, + "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>; } // isCommutable // Multiply + accumulate -def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), - (ins rGPR:$a, rGPR:$b), IIC_iMAC64, - "smlal", "\t$ldst, $hdst, $a, $b", []>{ - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0111; - let Inst{22-20} = 0b100; - let Inst{7-4} = 0b0000; -} - -def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), - (ins rGPR:$a, rGPR:$b), IIC_iMAC64, - "umlal", "\t$ldst, $hdst, $a, $b", []>{ - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0111; - let Inst{22-20} = 0b110; - let Inst{7-4} = 0b0000; -} - -def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), - (ins rGPR:$a, rGPR:$b), IIC_iMAC64, - "umaal", "\t$ldst, $hdst, $a, $b", []>{ - let Inst{31-27} = 0b11111; - let Inst{26-23} = 0b0111; - let Inst{22-20} = 0b110; - let Inst{7-4} = 0b0110; -} +def t2SMLAL : T2MulLong<0b100, 0b0000, + (outs rGPR:$RdLo, rGPR:$RdHi), + (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, + "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + +def t2UMLAL : T2MulLong<0b110, 0b0000, + (outs rGPR:$RdLo, rGPR:$RdHi), + (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, + "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; + +def t2UMAAL : T2MulLong<0b110, 0b0110, + (outs rGPR:$RdLo, rGPR:$RdHi), + (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, + "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>; } // neverHasSideEffects // Rounding variants of the below included for disassembly only // Most significant word multiply -def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, - "smmul", "\t$dst, $a, $b", - [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> { +def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, + "smmul", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -1796,8 +2305,8 @@ def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, - "smmulr", "\t$dst, $a, $b", []> { +def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32, + "smmulr", "\t$Rd, $Rn, $Rm", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; @@ -1805,49 +2314,49 @@ def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, - "smmla", "\t$dst, $a, $b, $c", - [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> { +def t2SMMLA : T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, + "smmla", "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, - "smmlar", "\t$dst, $a, $b, $c", []> { +def t2SMMLAR: T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, + "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b101; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } -def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, - "smmls", "\t$dst, $a, $b, $c", - [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> { +def t2SMMLS: T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, + "smmls", "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0) } -def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32, - "smmlsr", "\t$dst, $a, $b, $c", []> { +def t2SMMLSR:T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, + "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b110; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1) } multiclass T2I_smul<string opc, PatFrag opnode> { - def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, - !strconcat(opc, "bb"), "\t$dst, $a, $b", - [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), - (sext_inreg rGPR:$b, i16)))]> { + def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, + !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), + (sext_inreg rGPR:$Rm, i16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1856,10 +2365,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, - !strconcat(opc, "bt"), "\t$dst, $a, $b", - [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16), - (sra rGPR:$b, (i32 16))))]> { + def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, + !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16), + (sra rGPR:$Rm, (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1868,10 +2377,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b01; } - def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, - !strconcat(opc, "tb"), "\t$dst, $a, $b", - [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), - (sext_inreg rGPR:$b, i16)))]> { + def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, + !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)), + (sext_inreg rGPR:$Rm, i16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1880,10 +2389,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b10; } - def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32, - !strconcat(opc, "tt"), "\t$dst, $a, $b", - [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)), - (sra rGPR:$b, (i32 16))))]> { + def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, + !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)), + (sra rGPR:$Rm, (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; @@ -1892,10 +2401,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b11; } - def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, - !strconcat(opc, "wb"), "\t$dst, $a, $b", - [(set rGPR:$dst, (sra (opnode rGPR:$a, - (sext_inreg rGPR:$b, i16)), (i32 16)))]> { + def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, + !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (sra (opnode rGPR:$Rn, + (sext_inreg rGPR:$Rm, i16)), (i32 16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1904,10 +2413,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> { let Inst{5-4} = 0b00; } - def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16, - !strconcat(opc, "wt"), "\t$dst, $a, $b", - [(set rGPR:$dst, (sra (opnode rGPR:$a, - (sra rGPR:$b, (i32 16))), (i32 16)))]> { + def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, + !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (sra (opnode rGPR:$Rn, + (sra rGPR:$Rm, (i32 16))), (i32 16)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; @@ -1919,75 +2428,75 @@ multiclass T2I_smul<string opc, PatFrag opnode> { multiclass T2I_smla<string opc, PatFrag opnode> { - def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, - !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc", - [(set rGPR:$dst, (add rGPR:$acc, - (opnode (sext_inreg rGPR:$a, i16), - (sext_inreg rGPR:$b, i16))))]> { + def BB : T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, + !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (add rGPR:$Ra, + (opnode (sext_inreg rGPR:$Rn, i16), + (sext_inreg rGPR:$Rm, i16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-6} = 0b00; let Inst{5-4} = 0b00; } - def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, - !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", - [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16), - (sra rGPR:$b, (i32 16)))))]> { + def BT : T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, + !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16), + (sra rGPR:$Rm, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-6} = 0b00; let Inst{5-4} = 0b01; } - def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, - !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc", - [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)), - (sext_inreg rGPR:$b, i16))))]> { + def TB : T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, + !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), + (sext_inreg rGPR:$Rm, i16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-6} = 0b00; let Inst{5-4} = 0b10; } - def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, - !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", - [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)), - (sra rGPR:$b, (i32 16)))))]> { + def TT : T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, + !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)), + (sra rGPR:$Rm, (i32 16)))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b001; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-6} = 0b00; let Inst{5-4} = 0b11; } - def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, - !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", - [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a, - (sext_inreg rGPR:$b, i16)), (i32 16))))]> { + def WB : T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, + !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, + (sext_inreg rGPR:$Rm, i16)), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-6} = 0b00; let Inst{5-4} = 0b00; } - def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16, - !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", - [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a, - (sra rGPR:$b, (i32 16))), (i32 16))))]> { + def WT : T2FourReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, + !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, + (sra rGPR:$Rm, (i32 16))), (i32 16))))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; let Inst{22-20} = 0b011; - let Inst{15-12} = {?, ?, ?, ?}; // Ra let Inst{7-6} = 0b00; let Inst{5-4} = 0b01; } @@ -1997,62 +2506,68 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only -def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst), - (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b", +def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd), + (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>; -def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst), - (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b", +def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd), + (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>; -def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst), - (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b", +def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd), + (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>; -def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst), - (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b", +def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd), + (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm", [/* For disassembly only; pattern left blank */]>; // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD // These are for disassembly only. -def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), - IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> { +def t2SMUAD: T2ThreeReg_mac< + 0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []> { let Inst{15-12} = 0b1111; } -def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), - IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> { +def t2SMUADX:T2ThreeReg_mac< + 0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []> { let Inst{15-12} = 0b1111; } -def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), - IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> { +def t2SMUSD: T2ThreeReg_mac< + 0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []> { let Inst{15-12} = 0b1111; } -def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), - IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> { +def t2SMUSDX:T2ThreeReg_mac< + 0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []> { let Inst{15-12} = 0b1111; } -def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), - (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad", - "\t$dst, $a, $b, $acc", []>; -def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), - (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx", - "\t$dst, $a, $b, $acc", []>; -def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), - (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd", - "\t$dst, $a, $b, $acc", []>; -def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), - (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx", - "\t$dst, $a, $b, $acc", []>; -def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst), - (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald", - "\t$ldst, $hdst, $a, $b", []>; -def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), - (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx", - "\t$ldst, $hdst, $a, $b", []>; -def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst), - (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld", - "\t$ldst, $hdst, $a, $b", []>; -def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), - (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx", - "\t$ldst, $hdst, $a, $b", []>; +def t2SMLAD : T2ThreeReg_mac< + 0, 0b010, 0b0000, (outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad", + "\t$Rd, $Rn, $Rm, $Ra", []>; +def t2SMLADX : T2FourReg_mac< + 0, 0b010, 0b0001, (outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx", + "\t$Rd, $Rn, $Rm, $Ra", []>; +def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd", + "\t$Rd, $Rn, $Rm, $Ra", []>; +def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx", + "\t$Rd, $Rn, $Rm, $Ra", []>; +def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), + (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald", + "\t$Ra, $Rd, $Rm, $Rn", []>; +def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), + (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx", + "\t$Ra, $Rd, $Rm, $Rn", []>; +def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd), + (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld", + "\t$Ra, $Rd, $Rm, $Rn", []>; +def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd), + (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx", + "\t$Ra, $Rd, $Rm, $Rn", []>; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. @@ -2060,99 +2575,117 @@ def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst), class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { + : T2ThreeReg<oops, iops, itin, opc, asm, pattern> { let Inst{31-27} = 0b11111; let Inst{26-22} = 0b01010; let Inst{21-20} = op1; let Inst{15-12} = 0b1111; let Inst{7-6} = 0b10; let Inst{5-4} = op2; + let Rn{3-0} = Rm; } -def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, - "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>; +def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, + "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>; -def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, - "rbit", "\t$dst, $src", - [(set rGPR:$dst, (ARMrbit rGPR:$src))]>; +def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, + "rbit", "\t$Rd, $Rm", + [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>; -def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, - "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>; +def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, + "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>; -def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, - "rev16", ".w\t$dst, $src", - [(set rGPR:$dst, - (or (and (srl rGPR:$src, (i32 8)), 0xFF), - (or (and (shl rGPR:$src, (i32 8)), 0xFF00), - (or (and (srl rGPR:$src, (i32 8)), 0xFF0000), - (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>; +def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, + "rev16", ".w\t$Rd, $Rm", + [(set rGPR:$Rd, + (or (and (srl rGPR:$Rm, (i32 8)), 0xFF), + (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00), + (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000), + (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>; -def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr, - "revsh", ".w\t$dst, $src", - [(set rGPR:$dst, +def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, + "revsh", ".w\t$Rd, $Rm", + [(set rGPR:$Rd, (sext_inreg - (or (srl (and rGPR:$src, 0xFF00), (i32 8)), - (shl rGPR:$src, (i32 8))), i16))]>; - -def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh), - IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh", - [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF), - (and (shl rGPR:$src2, lsl_amt:$sh), + (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), + (shl rGPR:$Rm, (i32 8))), i16))]>; + +def t2PKHBT : T2ThreeReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), + IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", + [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF), + (and (shl rGPR:$Rm, lsl_amt:$sh), 0xFFFF0000)))]>, - Requires<[HasT2ExtractPack]> { + Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; let Inst{5} = 0; // BT form let Inst{4} = 0; + + bits<8> sh; + let Inst{14-12} = sh{7-5}; + let Inst{7-6} = sh{4-3}; } // Alternate cases for PKHBT where identities eliminate some nodes. def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)), (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>, - Requires<[HasT2ExtractPack]>; + Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)), (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>, - Requires<[HasT2ExtractPack]>; + Requires<[HasT2ExtractPack, IsThumb2]>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and // will match the pattern below. -def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh), - IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh", - [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000), - (and (sra rGPR:$src2, asr_amt:$sh), +def t2PKHTB : T2ThreeReg< + (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), + IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh", + [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000), + (and (sra rGPR:$Rm, asr_amt:$sh), 0xFFFF)))]>, - Requires<[HasT2ExtractPack]> { + Requires<[HasT2ExtractPack, IsThumb2]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; let Inst{5} = 1; // TB form let Inst{4} = 0; + + bits<8> sh; + let Inst{14-12} = sh{7-5}; + let Inst{7-6} = sh{4-3}; } // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)), (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>, - Requires<[HasT2ExtractPack]>; + Requires<[HasT2ExtractPack, IsThumb2]>; def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)), (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>, - Requires<[HasT2ExtractPack]>; + Requires<[HasT2ExtractPack, IsThumb2]>; //===----------------------------------------------------------------------===// // Comparison Instructions... // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; -defm t2CMPz : T2I_cmp_irs<0b1101, "cmp", - BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; + +def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_imm:$imm), + (t2CMPri GPR:$lhs, t2_so_imm:$imm)>; +def : T2Pat<(ARMcmpZ GPR:$lhs, rGPR:$rhs), + (t2CMPrr GPR:$lhs, rGPR:$rhs)>; +def : T2Pat<(ARMcmpZ GPR:$lhs, t2_so_reg:$rhs), + (t2CMPrs GPR:$lhs, t2_so_reg:$rhs)>; //FIXME: Disable CMN, as CCodes are backwards from compare expectations // Compare-to-zero still works out, just not the relationals //defm t2CMN : T2I_cmp_irs<0b1000, "cmn", // BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; defm t2CMNz : T2I_cmp_irs<0b1000, "cmn", + IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; //def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), @@ -2162,18 +2695,21 @@ def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm), (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>; defm t2TST : T2I_cmp_irs<0b0000, "tst", - BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>; + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>; defm t2TEQ : T2I_cmp_irs<0b0100, "teq", - BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; + IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr, - "mov", ".w\t$dst, $true", - [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst"> { +def t2MOVCCr : T2TwoReg< + (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm), IIC_iCMOVr, + "mov", ".w\t$Rd, $Rm", + [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd"> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2183,10 +2719,11 @@ def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr, let Inst{7-4} = 0b0000; } -def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true), - IIC_iCMOVi, "mov", ".w\t$dst, $true", -[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst"> { +let isMoveImm = 1 in +def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), + IIC_iCMOVi, "mov", ".w\t$Rd, $imm", +[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd"> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = 0b0010; @@ -2195,9 +2732,49 @@ def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true), let Inst{15} = 0; } +let isMoveImm = 1 in +def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm), + IIC_iCMOVi, + "movw", "\t$Rd, $imm", []>, + RegConstraint<"$false = $Rd"> { + let Inst{31-27} = 0b11110; + let Inst{25} = 1; + let Inst{24-21} = 0b0010; + let Inst{20} = 0; // The S bit. + let Inst{15} = 0; + + bits<4> Rd; + bits<16> imm; + + let Inst{11-8} = Rd; + let Inst{19-16} = imm{15-12}; + let Inst{26} = imm{11}; + let Inst{14-12} = imm{10-8}; + let Inst{7-0} = imm{7-0}; +} + +let isMoveImm = 1 in +def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst), + (ins rGPR:$false, i32imm:$src, pred:$p), + IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">; + +let isMoveImm = 1 in +def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), + IIC_iCMOVi, "mvn", ".w\t$Rd, $imm", +[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm, + imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd"> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = 0b0011; + let Inst{20} = 0; // The S bit. + let Inst{19-16} = 0b1111; // Rn + let Inst{15} = 0; +} + class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { + : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2205,22 +2782,22 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, let Inst{19-16} = 0b1111; // Rn let Inst{5-4} = opcod; // Shift type. } -def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst), - (ins rGPR:$false, rGPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>, - RegConstraint<"$false = $dst">; -def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst), - (ins rGPR:$false, rGPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>, - RegConstraint<"$false = $dst">; -def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst), - (ins rGPR:$false, rGPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>, - RegConstraint<"$false = $dst">; -def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst), - (ins rGPR:$false, rGPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, - RegConstraint<"$false = $dst">; +def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd), + (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), + IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>, + RegConstraint<"$false = $Rd">; +def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd), + (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), + IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>, + RegConstraint<"$false = $Rd">; +def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd), + (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), + IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>, + RegConstraint<"$false = $Rd">; +def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), + (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), + IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, + RegConstraint<"$false = $Rd">; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -2229,78 +2806,29 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst), // memory barriers protect the atomic sequences let hasSideEffects = 1 in { -def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "", - [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> { - let Inst{31-4} = 0xF3BF8F5; - // FIXME: add support for options other than a full system DMB - let Inst{3-0} = 0b1111; -} - -def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "", - [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> { - let Inst{31-4} = 0xF3BF8F4; - // FIXME: add support for options other than a full system DSB - let Inst{3-0} = 0b1111; -} +def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, + "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, + Requires<[IsThumb, HasDB]> { + bits<4> opt; + let Inst{31-4} = 0xf3bf8f5; + let Inst{3-0} = opt; } - -// Helper class for multiclass T2MemB -- for disassembly only -class T2I_memb<string opc, string asm> - : T2I<(outs), (ins), NoItinerary, opc, asm, - [/* For disassembly only; pattern left blank */]>, - Requires<[IsThumb2, HasV7]> { - let Inst{31-20} = 0xf3b; - let Inst{15-14} = 0b10; - let Inst{12} = 0; } -multiclass T2MemB<bits<4> op7_4, string opc> { - - def st : T2I_memb<opc, "\tst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1110; - } - - def ish : T2I_memb<opc, "\tish"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1011; - } - - def ishst : T2I_memb<opc, "\tishst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b1010; - } - - def nsh : T2I_memb<opc, "\tnsh"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0111; - } - - def nshst : T2I_memb<opc, "\tnshst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0110; - } - - def osh : T2I_memb<opc, "\tosh"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0011; - } - - def oshst : T2I_memb<opc, "\toshst"> { - let Inst{7-4} = op7_4; - let Inst{3-0} = 0b0010; - } +def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, + "dsb", "\t$opt", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb, HasDB]> { + bits<4> opt; + let Inst{31-4} = 0xf3bf8f4; + let Inst{3-0} = opt; } -// These DMB variants are for disassembly only. -defm t2DMB : T2MemB<0b0101, "dmb">; - -// These DSB variants are for disassembly only. -defm t2DSB : T2MemB<0b0100, "dsb">; - // ISB has only full system option -- for disassembly only -def t2ISBsy : T2I_memb<"isb", ""> { - let Inst{7-4} = 0b0110; +def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasV7]> { + let Inst{31-4} = 0xf3bf8f6; let Inst{3-0} = 0b1111; } @@ -2314,6 +2842,11 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{7-6} = 0b01; let Inst{5-4} = opcod; let Inst{3-0} = 0b1111; + + bits<4> Rn; + bits<4> Rt; + let Inst{19-16} = Rn; + let Inst{15-12} = Rt; } class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, @@ -2324,60 +2857,88 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{11-8} = rt2; let Inst{7-6} = 0b01; let Inst{5-4} = opcod; + + bits<4> Rd; + bits<4> Rn; + bits<4> Rt; + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{15-12} = Rt; } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]", +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]", +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]", "", []>; -def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone, +def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, Size4Bytes, NoItinerary, - "ldrex", "\t$dest, [$ptr]", "", + "ldrex", "\t$Rt, [$Rn]", "", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000101; let Inst{11-8} = 0b1111; let Inst{7-0} = 0b00000000; // imm8 = 0 + + bits<4> Rn; + bits<4> Rt; + let Inst{19-16} = Rn; + let Inst{15-12} = Rt; } -def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr), +def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn), AddrModeNone, Size4Bytes, NoItinerary, - "ldrexd", "\t$dest, $dest2, [$ptr]", "", - [], {?, ?, ?, ?}>; + "ldrexd", "\t$Rt, $Rt2, [$Rn]", "", + [], {?, ?, ?, ?}> { + bits<4> Rt2; + let Inst{11-8} = Rt2; +} } -let mayStore = 1, Constraints = "@earlyclobber $success" in { -def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), +let mayStore = 1, Constraints = "@earlyclobber $Rd" in { +def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), AddrModeNone, Size4Bytes, NoItinerary, - "strexb", "\t$success, $src, [$ptr]", "", []>; -def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), + "strexb", "\t$Rd, $Rt, [$Rn]", "", []>; +def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), AddrModeNone, Size4Bytes, NoItinerary, - "strexh", "\t$success, $src, [$ptr]", "", []>; -def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr), + "strexh", "\t$Rd, $Rt, [$Rn]", "", []>; +def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), AddrModeNone, Size4Bytes, NoItinerary, - "strex", "\t$success, $src, [$ptr]", "", + "strex", "\t$Rd, $Rt, [$Rn]", "", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000100; let Inst{7-0} = 0b00000000; // imm8 = 0 + + bits<4> Rd; + bits<4> Rn; + bits<4> Rt; + let Inst{11-8} = Rd; + let Inst{19-16} = Rn; + let Inst{15-12} = Rt; } -def t2STREXD : T2I_strex<0b11, (outs rGPR:$success), - (ins rGPR:$src, rGPR:$src2, rGPR:$ptr), +def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), + (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn), AddrModeNone, Size4Bytes, NoItinerary, - "strexd", "\t$success, $src, $src2, [$ptr]", "", [], - {?, ?, ?, ?}>; + "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [], + {?, ?, ?, ?}> { + bits<4> Rt2; + let Inst{11-8} = Rt2; +} } // Clear-Exclusive is for disassembly only. -def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", - [/* For disassembly only; pattern left blank */]>, - Requires<[IsARM, HasV7]> { - let Inst{31-20} = 0xf3b; +def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex", + [/* For disassembly only; pattern left blank */]>, + Requires<[IsThumb2, HasV7]> { + let Inst{31-16} = 0xf3bf; let Inst{15-14} = 0b10; + let Inst{13} = 0; let Inst{12} = 0; + let Inst{11-8} = 0b1111; let Inst{7-4} = 0b0010; + let Inst{3-0} = 0b1111; } //===----------------------------------------------------------------------===// @@ -2386,7 +2947,7 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", // __aeabi_read_tp preserves the registers r1-r3. let isCall = 1, - Defs = [R0, R12, LR, CPSR] in { + Defs = [R0, R12, LR, CPSR], Uses = [SP] in { def t2TPsoft : T2XI<(outs), (ins), IIC_Br, "bl\t__aeabi_read_tp", [(set R0, ARMthread_pointer)]> { @@ -2413,32 +2974,18 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ], hasSideEffects = 1, isBarrier = 1 in { + D31 ], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), - AddrModeNone, SizeSpecial, NoItinerary, - "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" - "adds\t$val, #7\n\t" - "str\t$val, [$src, #4]\n\t" - "movs\tr0, #0\n\t" - "b\t1f\n\t" - "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" - "1:", "", + AddrModeNone, SizeSpecial, NoItinerary, "", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; } let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], - hasSideEffects = 1, isBarrier = 1 in { + hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), - AddrModeNone, SizeSpecial, NoItinerary, - "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" - "adds\t$val, #7\n\t" - "str\t$val, [$src, #4]\n\t" - "movs\tr0, #0\n\t" - "b\t1f\n\t" - "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" - "1:", "", + AddrModeNone, SizeSpecial, NoItinerary, "", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; } @@ -2453,82 +3000,77 @@ let Defs = // operand list. // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, - hasExtraDefRegAllocReq = 1 in - def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$dsts, variable_ops), IIC_Br, - "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts", - "$addr.addr = $wb", []> { + hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in +def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + IIC_iLoad_mBr, + "ldmia${p}.w\t$Rn!, $regs", + "$Rn = $wb", []> { + bits<4> Rn; + bits<16> regs; + let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; - let Inst{24-23} = {?, ?}; // IA: '01', DB: '10' - let Inst{22} = 0; - let Inst{21} = 1; // The W bit. - let Inst{20} = 1; // Load + let Inst{24-23} = 0b01; // Increment After + let Inst{22} = 0; + let Inst{21} = 1; // Writeback + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-0} = regs; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in -def t2B : T2XI<(outs), (ins brtarget:$target), IIC_Br, +def t2B : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br, "b.w\t$target", [(br bb:$target)]> { let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; let Inst{12} = 1; + + bits<20> target; + let Inst{26} = target{19}; + let Inst{11} = target{18}; + let Inst{13} = target{17}; + let Inst{21-16} = target{16-11}; + let Inst{10-0} = target{10-0}; } let isNotDuplicable = 1, isIndirectBranch = 1 in { -def t2BR_JT : - T2JTI<(outs), - (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "mov\tpc, $target$jt", - [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0100100; - let Inst{19-16} = 0b1111; - let Inst{14-12} = 0b000; - let Inst{11-8} = 0b1111; // Rd = pc - let Inst{7-4} = 0b0000; -} +def t2BR_JT : t2PseudoInst<(outs), + (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id), + SizeSpecial, IIC_Br, + [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>; // FIXME: Add a non-pc based case that can be predicated. -def t2TBB : - T2JTI<(outs), - (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbb\t$index$jt", []> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0001101; - let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) - let Inst{15-8} = 0b11110000; - let Inst{7-4} = 0b0000; // B form -} - -def t2TBH : - T2JTI<(outs), - (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbh\t$index$jt", []> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0001101; - let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction) - let Inst{15-8} = 0b11110000; - let Inst{7-4} = 0b0001; // H form -} - -// Generic versions of the above two instructions, for disassembly only - -def t2TBBgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br, - "tbb", "\t[$a, $b]", []>{ - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0001101; - let Inst{15-8} = 0b11110000; - let Inst{7-4} = 0b0000; // B form -} - -def t2TBHgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br, - "tbh", "\t[$a, $b, lsl #1]", []> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0001101; - let Inst{15-8} = 0b11110000; - let Inst{7-4} = 0b0001; // H form +def t2TBB_JT : t2PseudoInst<(outs), + (ins GPR:$index, i32imm:$jt, i32imm:$id), + SizeSpecial, IIC_Br, []>; + +def t2TBH_JT : t2PseudoInst<(outs), + (ins GPR:$index, i32imm:$jt, i32imm:$id), + SizeSpecial, IIC_Br, []>; + +def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, + "tbb", "\t[$Rn, $Rm]", []> { + bits<4> Rn; + bits<4> Rm; + let Inst{31-20} = 0b111010001101; + let Inst{19-16} = Rn; + let Inst{15-5} = 0b11110000000; + let Inst{4} = 0; // B form + let Inst{3-0} = Rm; +} + +def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br, + "tbh", "\t[$Rn, $Rm, lsl #1]", []> { + bits<4> Rn; + bits<4> Rm; + let Inst{31-20} = 0b111010001101; + let Inst{19-16} = Rn; + let Inst{15-5} = 0b11110000000; + let Inst{4} = 1; // H form + let Inst{3-0} = Rm; } } // isNotDuplicable, isIndirectBranch @@ -2543,6 +3085,16 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let Inst{31-27} = 0b11110; let Inst{15-14} = 0b10; let Inst{12} = 0; + + bits<4> p; + let Inst{25-22} = p; + + bits<21> target; + let Inst{26} = target{20}; + let Inst{11} = target{19}; + let Inst{13} = target{18}; + let Inst{21-16} = target{17-12}; + let Inst{10-0} = target{11-1}; } @@ -2554,6 +3106,11 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // 16-bit instruction. let Inst{31-16} = 0x0000; let Inst{15-8} = 0b10111111; + + bits<4> cc; + bits<4> mask; + let Inst{7-4} = cc; + let Inst{3-0} = mask; } // Branch and Exchange Jazelle -- for disassembly only @@ -2565,22 +3122,44 @@ def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", let Inst{25-20} = 0b111100; let Inst{15-14} = 0b10; let Inst{12} = 0; + + bits<4> func; + let Inst{19-16} = func; } -// Change Processor State is a system instruction -- for disassembly only. -// The singleton $opt operand contains the following information: -// opt{4-0} = mode from Inst{4-0} -// opt{5} = changemode from Inst{17} -// opt{8-6} = AIF from Inst{8-6} -// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable -def t2CPS : T2XI<(outs),(ins cps_opt:$opt), NoItinerary, "cps$opt", - [/* For disassembly only; pattern left blank */]> { +// Change Processor State is a system instruction -- for disassembly and +// parsing only. +// FIXME: Since the asm parser has currently no clean way to handle optional +// operands, create 3 versions of the same instruction. Once there's a clean +// framework to represent optional operands, change this behavior. +class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, + !strconcat("cps", asm_op), + [/* For disassembly only; pattern left blank */]> { + bits<2> imod; + bits<3> iflags; + bits<5> mode; + bit M; + let Inst{31-27} = 0b11110; - let Inst{26} = 0; + let Inst{26} = 0; let Inst{25-20} = 0b111010; + let Inst{19-16} = 0b1111; let Inst{15-14} = 0b10; - let Inst{12} = 0; -} + let Inst{12} = 0; + let Inst{10-9} = imod; + let Inst{8} = M; + let Inst{7-5} = iflags; + let Inst{4-0} = mode; +} + +let M = 1 in + def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode), + "$imod.w\t$iflags, $mode">; +let mode = 0, M = 0 in + def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags), + "$imod.w\t$iflags">; +let imod = 0, iflags = 0, M = 1 in + def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">; // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions @@ -2589,6 +3168,7 @@ class T2I_hint<bits<8> op7_0, string opc, string asm> : T2I<(outs), (ins), NoItinerary, opc, asm, [/* For disassembly only; pattern left blank */]> { let Inst{31-20} = 0xf3a; + let Inst{19-16} = 0b1111; let Inst{15-14} = 0b10; let Inst{12} = 0; let Inst{10-8} = 0b000; @@ -2608,6 +3188,9 @@ def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt", let Inst{12} = 0; let Inst{10-8} = 0b000; let Inst{7-4} = 0b1111; + + bits<4> opt; + let Inst{3-0} = opt; } // Secure Monitor Call is a system instruction -- for disassembly only @@ -2617,83 +3200,86 @@ def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt", let Inst{31-27} = 0b11110; let Inst{26-20} = 0b1111111; let Inst{15-12} = 0b1000; -} -// Store Return State is a system instruction -- for disassembly only -def t2SRSDBW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0000010; // W = 1 + bits<4> opt; + let Inst{19-16} = opt; } -def t2SRSDB : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0000000; // W = 0 -} +class T2SRS<bits<12> op31_20, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + let Inst{31-20} = op31_20{11-0}; -def t2SRSIAW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0011010; // W = 1 + bits<5> mode; + let Inst{4-0} = mode{4-0}; } -def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0011000; // W = 0 -} +// Store Return State is a system instruction -- for disassembly only +def t2SRSDBW : T2SRS<0b111010000010, + (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode", + [/* For disassembly only; pattern left blank */]>; +def t2SRSDB : T2SRS<0b111010000000, + (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode", + [/* For disassembly only; pattern left blank */]>; +def t2SRSIAW : T2SRS<0b111010011010, + (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode", + [/* For disassembly only; pattern left blank */]>; +def t2SRSIA : T2SRS<0b111010011000, + (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode", + [/* For disassembly only; pattern left blank */]>; // Return From Exception is a system instruction -- for disassembly only -def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0000011; // W = 1 -} -def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0000001; // W = 0 -} +class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + let Inst{31-20} = op31_20{11-0}; -def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0011011; // W = 1 + bits<4> Rn; + let Inst{19-16} = Rn; } -def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11101; - let Inst{26-20} = 0b0011001; // W = 0 -} +def t2RFEDBW : T2RFE<0b111010000011, + (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!", + [/* For disassembly only; pattern left blank */]>; +def t2RFEDB : T2RFE<0b111010000001, + (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn", + [/* For disassembly only; pattern left blank */]>; +def t2RFEIAW : T2RFE<0b111010011011, + (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!", + [/* For disassembly only; pattern left blank */]>; +def t2RFEIA : T2RFE<0b111010011001, + (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn", + [/* For disassembly only; pattern left blank */]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns // -// Two piece so_imms. -def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS), - (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), - (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS), - (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), - (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS), - (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), - (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS), - (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), - (t2_so_neg_imm2part_2 imm:$RHS))>; - // 32-bit immediate using movw + movt. -// This is a single pseudo instruction to make it re-materializable. Remove -// when we can do generalized remat. -let isReMaterializable = 1 in -def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi, - "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", - [(set rGPR:$dst, (i32 imm:$src))]>; +// This is a single pseudo instruction to make it re-materializable. +// FIXME: Remove this when we can do generalized remat. +let isReMaterializable = 1, isMoveImm = 1 in +def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2, + [(set rGPR:$dst, (i32 imm:$src))]>, + Requires<[IsThumb, HasV6T2]>; + +// Pseudo instruction that combines movw + movt + add pc (if pic). +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. +let isReMaterializable = 1 in { +def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, + [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsThumb2, UseMovt]>; + +def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2, + [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, + Requires<[IsThumb2, UseMovt]>; +} // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, @@ -2709,10 +3295,9 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // be expanded into two instructions late to allow if-conversion and // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in -def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, - "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", - [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), +def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), + IIC_iLoadiALU, + [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; @@ -2720,48 +3305,128 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), // Move between special register and ARM core register -- for disassembly only // -// Rd = Instr{11-8} -def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{26} = 0; - let Inst{25-21} = 0b11111; - let Inst{20} = 0; // The R bit. - let Inst{15-14} = 0b10; - let Inst{12} = 0; +class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2I<oops, iops, itin, opc, asm, pattern> { + let Inst{31-20} = op31_20{11-0}; + let Inst{15-14} = op15_14{1-0}; + let Inst{12} = op12{0}; } -// Rd = Instr{11-8} -def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{26} = 0; - let Inst{25-21} = 0b11111; - let Inst{20} = 1; // The R bit. - let Inst{15-14} = 0b10; - let Inst{12} = 0; +class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> { + bits<4> Rd; + let Inst{11-8} = Rd; + let Inst{19-16} = 0b1111; } -// Rn = Inst{19-16} -def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr", - "\tcpsr$mask, $src", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{26} = 0; - let Inst{25-21} = 0b11100; - let Inst{20} = 0; // The R bit. - let Inst{15-14} = 0b10; - let Inst{12} = 0; +def t2MRS : T2MRS<0b111100111110, 0b10, 0, + (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr", + [/* For disassembly only; pattern left blank */]>; +def t2MRSsys : T2MRS<0b111100111111, 0b10, 0, + (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", + [/* For disassembly only; pattern left blank */]>; + +// Move from ARM core register to Special Register +// +// No need to have both system and application versions, the encodings are the +// same and the assembly parser has no way to distinguish between them. The mask +// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains +// the mask with the fields to be accessed in the special register. +def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */, + 0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn), + NoItinerary, "msr", "\t$mask, $Rn", + [/* For disassembly only; pattern left blank */]> { + bits<5> mask; + bits<4> Rn; + let Inst{19-16} = Rn; + let Inst{20} = mask{4}; // R Bit + let Inst{13} = 0b0; + let Inst{11-8} = mask{3-0}; } -// Rn = Inst{19-16} -def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr", - "\tspsr$mask, $src", +//===----------------------------------------------------------------------===// +// Move between coprocessor and ARM core register -- for disassembly only +// + +class t2MovRCopro<string opc, bit direction> + : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), + [/* For disassembly only; pattern left blank */]> { + let Inst{27-24} = 0b1110; + let Inst{20} = direction; + let Inst{4} = 1; + + bits<4> Rt; + bits<4> cop; + bits<3> opc1; + bits<3> opc2; + bits<4> CRm; + bits<4> CRn; + + let Inst{15-12} = Rt; + let Inst{11-8} = cop; + let Inst{23-21} = opc1; + let Inst{7-5} = opc2; + let Inst{3-0} = CRm; + let Inst{19-16} = CRn; +} + +def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>; +def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>; + +class t2MovRRCopro<string opc, bit direction> + : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), + [/* For disassembly only; pattern left blank */]> { + let Inst{27-24} = 0b1100; + let Inst{23-21} = 0b010; + let Inst{20} = direction; + + bits<4> Rt; + bits<4> Rt2; + bits<4> cop; + bits<4> opc1; + bits<4> CRm; + + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{11-8} = cop; + let Inst{7-4} = opc1; + let Inst{3-0} = CRm; +} + +def t2MCRR2 : t2MovRRCopro<"mcrr2", + 0 /* from ARM core register to coprocessor */>; +def t2MRRC2 : t2MovRRCopro<"mrrc2", + 1 /* from coprocessor to ARM core register */>; + +//===----------------------------------------------------------------------===// +// Other Coprocessor Instructions. For disassembly only. +// + +def t2CDP2 : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [/* For disassembly only; pattern left blank */]> { - let Inst{31-27} = 0b11110; - let Inst{26} = 0; - let Inst{25-21} = 0b11100; - let Inst{20} = 1; // The R bit. - let Inst{15-14} = 0b10; - let Inst{12} = 0; + let Inst{27-24} = 0b1110; + + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index c29e096..920c5c9 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1,4 +1,4 @@ -//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===// +//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -11,30 +11,26 @@ // //===----------------------------------------------------------------------===// -def SDT_FTOI : -SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; -def SDT_ITOF : -SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; -def SDT_CMPFP0 : -SDTypeProfile<0, 1, [SDTCisFP<0>]>; -def SDT_VMOVDRR : -SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, - SDTCisSameAs<1, 2>]>; - -def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; -def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; -def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; -def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; -def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; -def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; -def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; -def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; +def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; +def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; +def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; +def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>]>; + +def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; +def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; +def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; +def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; +def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>; +def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>; +def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>; +def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; + //===----------------------------------------------------------------------===// // Operand Definitions. // - def vfp_f32imm : Operand<f32>, PatLeaf<(f32 fpimm), [{ return ARM::getVFPf32Imm(N->getValueAPF()) != -1; @@ -55,86 +51,136 @@ def vfp_f64imm : Operand<f64>, // let canFoldAsLoad = 1, isReMaterializable = 1 in { -def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad64, "vldr", ".64\t$dst, $addr", - [(set DPR:$dst, (f64 (load addrmode5:$addr)))]>; -def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad32, "vldr", ".32\t$dst, $addr", - [(set SPR:$dst, (load addrmode5:$addr))]>; -} // canFoldAsLoad +def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), + IIC_fpLoad64, "vldr", ".64\t$Dd, $addr", + [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; -def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), - IIC_fpStore64, "vstr", ".64\t$src, $addr", - [(store (f64 DPR:$src), addrmode5:$addr)]>; +def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), + IIC_fpLoad32, "vldr", ".32\t$Sd, $addr", + [(set SPR:$Sd, (load addrmode5:$addr))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} -def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), - IIC_fpStore32, "vstr", ".32\t$src, $addr", - [(store SPR:$src, addrmode5:$addr)]>; +} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' -//===----------------------------------------------------------------------===// -// Load / store multiple Instructions. -// +def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), + IIC_fpStore64, "vstr", ".64\t$Dd, $addr", + [(store (f64 DPR:$Dd), addrmode5:$addr)]>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, - variable_ops), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t$addr, $dsts", "", []> { - let Inst{20} = 1; +def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), + IIC_fpStore32, "vstr", ".32\t$Sd, $addr", + [(store SPR:$Sd, addrmode5:$addr)]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } -def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, - variable_ops), IndexModeNone, IIC_fpLoadm, - "vldm${addr:submode}${p}\t$addr, $dsts", "", []> { - let Inst{20} = 1; -} +//===----------------------------------------------------------------------===// +// Load / store multiple Instructions. +// -def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$dsts, variable_ops), - IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t$addr!, $dsts", - "$addr.addr = $wb", []> { - let Inst{20} = 1; +multiclass vfp_ldst_mult<string asm, bit L_bit, + InstrItinClass itin, InstrItinClass itin_upd> { + // Double Precision + def DIA : + AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeNone, itin, + !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def DIA_UPD : + AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeUpd, itin_upd, + !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } + def DDB : + AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeNone, itin, + !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + } + def DDB_UPD : + AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), + IndexModeUpd, itin_upd, + !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + } + + // Single Precision + def SIA : + AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), + IndexModeNone, itin, + !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. + let D = VFPNeonDomain; + } + def SIA_UPD : + AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), + IndexModeUpd, itin_upd, + !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b01; // Increment After + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. + let D = VFPNeonDomain; + } + def SDB : + AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), + IndexModeNone, itin, + !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 0; // No writeback + let Inst{20} = L_bit; + + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. + let D = VFPNeonDomain; + } + def SDB_UPD : + AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), + IndexModeUpd, itin_upd, + !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + let Inst{24-23} = 0b10; // Decrement Before + let Inst{21} = 1; // Writeback + let Inst{20} = L_bit; + + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. + let D = VFPNeonDomain; + } } -def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$dsts, variable_ops), - IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t$addr!, $dsts", - "$addr.addr = $wb", []> { - let Inst{20} = 1; -} -} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq +let neverHasSideEffects = 1 in { -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { -def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, - variable_ops), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t$addr, $srcs", "", []> { - let Inst{20} = 0; -} +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>; -def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, - variable_ops), IndexModeNone, IIC_fpStorem, - "vstm${addr:submode}${p}\t$addr, $srcs", "", []> { - let Inst{20} = 0; -} +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>; -def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$srcs, variable_ops), - IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t$addr!, $srcs", - "$addr.addr = $wb", []> { - let Inst{20} = 0; -} +} // neverHasSideEffects -def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, - reglist:$srcs, variable_ops), - IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t$addr!, $srcs", - "$addr.addr = $wb", []> { - let Inst{20} = 0; -} -} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq +def : MnemonicAlias<"vldm", "vldmia">; +def : MnemonicAlias<"vstm", "vstmia">; // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores @@ -142,56 +188,71 @@ def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, // FP Binary Operations. // -def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fadd DPR:$a, (f64 DPR:$b)))]>; - -def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; - -// These are encoded as unary instructions. -let Defs = [FPSCR] in { -def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$a, DPR:$b), - IIC_fpCMP64, "vcmpe", ".f64\t$a, $b", - [(arm_cmpfp DPR:$a, (f64 DPR:$b))]>; - -def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$a, DPR:$b), - IIC_fpCMP64, "vcmp", ".f64\t$a, $b", - [/* For disassembly only; pattern left blank */]>; - -def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$a, SPR:$b), - IIC_fpCMP32, "vcmpe", ".f32\t$a, $b", - [(arm_cmpfp SPR:$a, SPR:$b)]>; - -def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$a, SPR:$b), - IIC_fpCMP32, "vcmp", ".f32\t$a, $b", - [/* For disassembly only; pattern left blank */]>; +def VADDD : ADbI<0b11100, 0b11, 0, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>; + +def VADDS : ASbIn<0b11100, 0b11, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } -def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fdiv DPR:$a, (f64 DPR:$b)))]>; - -def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; - -def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fmul DPR:$a, (f64 DPR:$b)))]>; - -def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; +def VSUBD : ADbI<0b11100, 0b11, 1, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>; + +def VSUBS : ASbIn<0b11100, 0b11, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} -def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fneg (fmul DPR:$a, (f64 DPR:$b))))]>; +def VDIVD : ADbI<0b11101, 0b00, 0, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>; + +def VDIVS : ASbI<0b11101, 0b00, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; + +def VMULD : ADbI<0b11100, 0b10, 0, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>; + +def VMULS : ASbIn<0b11100, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} -def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>; +def VNMULD : ADbI<0b11100, 0b10, 1, 0, + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>; + +def VNMULS : ASbI<0b11100, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), @@ -199,53 +260,128 @@ def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)), def : Pat<(fmul (fneg SPR:$a), SPR:$b), (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; +// These are encoded as unary instructions. +let Defs = [FPSCR] in { +def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, + (outs), (ins DPR:$Dd, DPR:$Dm), + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", + [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>; + +def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, + (outs), (ins SPR:$Sd, SPR:$Sm), + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", + [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} -def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fsub DPR:$a, (f64 DPR:$b)))]>; +// FIXME: Verify encoding after integrated assembler is working. +def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, + (outs), (ins DPR:$Dd, DPR:$Dm), + IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm", + [/* For disassembly only; pattern left blank */]>; -def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>; +def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, + (outs), (ins SPR:$Sd, SPR:$Sm), + IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} +} // Defs = [FPSCR] //===----------------------------------------------------------------------===// // FP Unary Operations. // -def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "vabs", ".f64\t$dst, $a", - [(set DPR:$dst, (fabs (f64 DPR:$a)))]>; - -def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,(outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "vabs", ".f32\t$dst, $a", - [(set SPR:$dst, (fabs SPR:$a))]>; +def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm", + [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>; + +def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm", + [(set SPR:$Sd, (fabs SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} let Defs = [FPSCR] in { -def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$a), - IIC_fpCMP64, "vcmpe", ".f64\t$a, #0", - [(arm_cmpfp0 (f64 DPR:$a))]>; +def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, + (outs), (ins DPR:$Dd), + IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", + [(arm_cmpfp0 (f64 DPR:$Dd))]> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; +} -def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$a), - IIC_fpCMP64, "vcmp", ".f64\t$a, #0", - [/* For disassembly only; pattern left blank */]>; +def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, + (outs), (ins SPR:$Sd), + IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0", + [(arm_cmpfp0 SPR:$Sd)]> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; -def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$a), - IIC_fpCMP32, "vcmpe", ".f32\t$a, #0", - [(arm_cmpfp0 SPR:$a)]>; + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} -def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$a), - IIC_fpCMP32, "vcmp", ".f32\t$a, #0", - [/* For disassembly only; pattern left blank */]>; +// FIXME: Verify encoding after integrated assembler is working. +def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, + (outs), (ins DPR:$Dd), + IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0", + [/* For disassembly only; pattern left blank */]> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; } -def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a", - [(set DPR:$dst, (fextend SPR:$a))]>; +def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, + (outs), (ins SPR:$Sd), + IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0", + [/* For disassembly only; pattern left blank */]> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} +} // Defs = [FPSCR] + +def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, + (outs DPR:$Dd), (ins SPR:$Sm), + IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm", + [(set DPR:$Dd, (fextend SPR:$Sm))]> { + // Instruction operands. + bits<5> Dd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; +} // Special case encoding: bits 11-8 is 0b1011. -def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, - IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a", - [(set SPR:$dst, (fround DPR:$a))]> { +def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, + IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm", + [(set SPR:$Sd, (fround DPR:$Dm))]> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{27-23} = 0b11101; let Inst{21-16} = 0b110111; let Inst{11-8} = 0b1011; @@ -255,6 +391,7 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. +// FIXME: Verify encoding after integrated assembler is working. def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; @@ -277,47 +414,94 @@ def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; -let neverHasSideEffects = 1 in { -def VMOVD: ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>; - -def VMOVS: ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>; -} // neverHasSideEffects +def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", + [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>; + +def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", + [(set SPR:$Sd, (fneg SPR:$Sm))]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} -def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "vneg", ".f64\t$dst, $a", - [(set DPR:$dst, (fneg (f64 DPR:$a)))]>; +def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm", + [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>; -def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,(outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "vneg", ".f32\t$dst, $a", - [(set SPR:$dst, (fneg SPR:$a))]>; +def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", + [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; -def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$dst), (ins DPR:$a), - IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a", - [(set DPR:$dst, (fsqrt (f64 DPR:$a)))]>; +let neverHasSideEffects = 1 in { +def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, + (outs DPR:$Dd), (ins DPR:$Dm), + IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>; -def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a", - [(set SPR:$dst, (fsqrt SPR:$a))]>; +def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>; +} // neverHasSideEffects //===----------------------------------------------------------------------===// // FP <-> GPR Copies. Int <-> FP Conversions. // -def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_fpMOVSI, "vmov", "\t$dst, $src", - [(set GPR:$dst, (bitconvert SPR:$src))]>; +def VMOVRS : AVConv2I<0b11100001, 0b1010, + (outs GPR:$Rt), (ins SPR:$Sn), + IIC_fpMOVSI, "vmov", "\t$Rt, $Sn", + [(set GPR:$Rt, (bitconvert SPR:$Sn))]> { + // Instruction operands. + bits<4> Rt; + bits<5> Sn; + + // Encode instruction operands. + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Rt; + + let Inst{6-5} = 0b00; + let Inst{3-0} = 0b0000; +} -def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_fpMOVIS, "vmov", "\t$dst, $src", - [(set SPR:$dst, (bitconvert GPR:$src))]>; +def VMOVSR : AVConv4I<0b11100000, 0b1010, + (outs SPR:$Sn), (ins GPR:$Rt), + IIC_fpMOVIS, "vmov", "\t$Sn, $Rt", + [(set SPR:$Sn, (bitconvert GPR:$Rt))]> { + // Instruction operands. + bits<5> Sn; + bits<4> Rt; + + // Encode instruction operands. + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Rt; + + let Inst{6-5} = 0b00; + let Inst{3-0} = 0b0000; +} let neverHasSideEffects = 1 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, - (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), - IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src", + (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm), + IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm", [/* FIXME: Can't write pattern for multiple result instr*/]> { + // Instruction operands. + bits<5> Dm; + bits<4> Rt; + bits<4> Rt2; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{7-6} = 0b00; } @@ -333,10 +517,21 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, // FMDLR: GPR -> SPR def VMOVDRR : AVConv5I<0b11000100, 0b1011, - (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2", - [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> { - let Inst{7-6} = 0b00; + (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2), + IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2", + [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> { + // Instruction operands. + bits<5> Dm; + bits<4> Rt; + bits<4> Rt2; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + + let Inst{7-6} = 0b00; } let neverHasSideEffects = 1 in @@ -350,102 +545,183 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, // FMRDH: SPR -> GPR // FMRDL: SPR -> GPR // FMRRS: SPR -> GPR -// FMRX : SPR system reg -> GPR - +// FMRX: SPR system reg -> GPR // FMSRR: GPR -> SPR +// FMXR: GPR -> VFP system reg + + +// Int -> FP: + +class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { + // Instruction operands. + bits<5> Dd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; +} -// FMXR: GPR -> VFP Sstem reg - - -// Int to FP: +class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops,InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} -def VSITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011, - (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a", - [(set DPR:$dst, (f64 (arm_sitof SPR:$a)))]> { +def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, + (outs DPR:$Dd), (ins SPR:$Sm), + IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", + [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> { let Inst{7} = 1; // s32 } -def VSITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010, - (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a", - [(set SPR:$dst, (arm_sitof SPR:$a))]> { +def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, + (outs SPR:$Sd),(ins SPR:$Sm), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm", + [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { let Inst{7} = 1; // s32 + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } -def VUITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011, - (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a", - [(set DPR:$dst, (f64 (arm_uitof SPR:$a)))]> { +def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, + (outs DPR:$Dd), (ins SPR:$Sm), + IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", + [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> { let Inst{7} = 0; // u32 } -def VUITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010, - (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a", - [(set SPR:$dst, (arm_uitof SPR:$a))]> { +def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm", + [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { let Inst{7} = 0; // u32 + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } -// FP to Int: -// Always set Z bit in the instruction, i.e. "round towards zero" variants. +// FP -> Int: + +class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Dm; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} + +class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; +} -def VTOSIZD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, - (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a", - [(set SPR:$dst, (arm_ftosi (f64 DPR:$a)))]> { +// Always set Z bit in the instruction, i.e. "round towards zero" variants. +def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, + (outs SPR:$Sd), (ins DPR:$Dm), + IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm", + [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> { let Inst{7} = 1; // Z bit } -def VTOSIZS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, - (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a", - [(set SPR:$dst, (arm_ftosi SPR:$a))]> { +def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm", + [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { let Inst{7} = 1; // Z bit + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } -def VTOUIZD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, - (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a", - [(set SPR:$dst, (arm_ftoui (f64 DPR:$a)))]> { +def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, + (outs SPR:$Sd), (ins DPR:$Dm), + IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", + [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> { let Inst{7} = 1; // Z bit } -def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, - (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a", - [(set SPR:$dst, (arm_ftoui SPR:$a))]> { +def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm", + [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { let Inst{7} = 1; // Z bit + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. -// For disassembly only. let Uses = [FPSCR] in { -def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011, - (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a", - [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> { +// FIXME: Verify encoding after integrated assembler is working. +def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, + (outs SPR:$Sd), (ins DPR:$Dm), + IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm", + [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{ let Inst{7} = 0; // Z bit } -def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010, - (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a", - [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> { +def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm", + [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> { let Inst{7} = 0; // Z bit } -def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011, - (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a", - [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> { +def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, + (outs SPR:$Sd), (ins DPR:$Dm), + IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm", + [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{ let Inst{7} = 0; // Z bit } -def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, - (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a", - [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> { +def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm", + [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> { let Inst{7} = 0; // Z bit } } @@ -457,30 +733,47 @@ def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010, // S32 (U=0, sx=1) -> SL // U32 (U=1, sx=1) -> UL -let Constraints = "$a = $dst" in { +// FIXME: Marking these as codegen only seems wrong. They are real +// instructions(?) +let Constraints = "$a = $dst", isCodeGenOnly = 1 in { // FP to Fixed-Point: -let isCodeGenOnly = 1 in { def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), @@ -501,30 +794,44 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]>; -} // Fixed-Point to FP: -let isCodeGenOnly = 1 in { def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), @@ -545,70 +852,120 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]>; -} -} // End of 'let Constraints = "$src = $dst" in' +} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in' //===----------------------------------------------------------------------===// // FP FMA Operations. // -def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0, - (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), - (f64 DPR:$dstin)))]>, - RegConstraint<"$dstin = $dst">; +def VMLAD : ADbI<0b11100, 0b00, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP2,UseFPVMLx]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, - (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, - RegConstraint<"$dstin = $dst">; - -def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0, - (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), - (f64 DPR:$dstin)))]>, - RegConstraint<"$dstin = $dst">; + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} -def VNMLSS : ASbI<0b11100, 0b01, 0, 0, - (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, - RegConstraint<"$dstin = $dst">; - -def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0, - (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), - (f64 DPR:$dstin)))]>, - RegConstraint<"$dstin = $dst">; +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,UseFPVMLx]>; +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>; + +def VMLSD : ADbI<0b11100, 0b00, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP2,UseFPVMLx]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, - (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, - RegConstraint<"$dstin = $dst">; - -def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))), - (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; -def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), - (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; - -def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0, - (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b", - [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), - (f64 DPR:$dstin)))]>, - RegConstraint<"$dstin = $dst">; + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} + +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,UseFPVMLx]>; +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + +def VNMLAD : ADbI<0b11100, 0b01, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP2,UseFPVMLx]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, - (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b", - [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, - RegConstraint<"$dstin = $dst">; + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} + +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), + (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,UseFPVMLx]>; +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), + (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + +def VNMLSD : ADbI<0b11100, 0b01, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP2,UseFPVMLx]>; + +def VNMLSS : ASbI<0b11100, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} + +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), + (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,UseFPVMLx]>; +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), + (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + //===----------------------------------------------------------------------===// // FP Conditional moves. @@ -616,92 +973,157 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, let neverHasSideEffects = 1 in { def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, - (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "vmov", ".f64\t$dst, $true", - [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, - RegConstraint<"$false = $dst">; + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", + [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, + RegConstraint<"$Dn = $Dd">; def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, - (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "vmov", ".f32\t$dst, $true", - [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, - RegConstraint<"$false = $dst">; + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", + [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, + RegConstraint<"$Sn = $Sd">; def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, - (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "vneg", ".f64\t$dst, $true", - [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, - RegConstraint<"$false = $dst">; + (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), + IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", + [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, + RegConstraint<"$Dn = $Dd">; def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, - (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "vneg", ".f32\t$dst, $true", - [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, - RegConstraint<"$false = $dst">; + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", + [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, + RegConstraint<"$Sn = $Sd"> { + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; +} } // neverHasSideEffects //===----------------------------------------------------------------------===// -// Misc. +// Move from VFP System Register to ARM core register. // -// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags -// to APSR. -let Defs = [CPSR], Uses = [FPSCR] in -def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", - "\tapsr_nzcv, fpscr", - [(arm_fmstat)]> { +class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm, + list<dag> pattern>: + VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> { + + // Instruction operand. + bits<4> Rt; + let Inst{27-20} = 0b11101111; - let Inst{19-16} = 0b0001; - let Inst{15-12} = 0b1111; + let Inst{19-16} = opc19_16; + let Inst{15-12} = Rt; let Inst{11-8} = 0b1010; let Inst{7} = 0; + let Inst{6-5} = 0b00; let Inst{4} = 1; + let Inst{3-0} = 0b0000; } -// FPSCR <-> GPR (for disassembly only) +// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags +// to APSR. +let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in +def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins), + "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>; + +// Application level FPSCR -> GPR let hasSideEffects = 1, Uses = [FPSCR] in -def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, - "vmrs", "\t$dst, fpscr", - [(set GPR:$dst, (int_arm_get_fpscr))]> { - let Inst{27-20} = 0b11101111; - let Inst{19-16} = 0b0001; - let Inst{11-8} = 0b1010; - let Inst{7} = 0; - let Inst{4} = 1; +def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpscr", + [(set GPR:$Rt, (int_arm_get_fpscr))]>; + +// System level FPEXC, FPSID -> GPR +let Uses = [FPSCR] in { + def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpexc", []>; + def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, fpsid", []>; } -let Defs = [FPSCR] in -def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, - "vmsr", "\tfpscr, $src", - [(int_arm_set_fpscr GPR:$src)]> { +//===----------------------------------------------------------------------===// +// Move from ARM core register to VFP System Register. +// + +class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm, + list<dag> pattern>: + VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> { + + // Instruction operand. + bits<4> src; + + // Encode instruction operand. + let Inst{15-12} = src; + let Inst{27-20} = 0b11101110; - let Inst{19-16} = 0b0001; + let Inst{19-16} = opc19_16; let Inst{11-8} = 0b1010; let Inst{7} = 0; let Inst{4} = 1; } +let Defs = [FPSCR] in { + // Application level GPR -> FPSCR + def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src), + "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>; + // System level GPR -> FPEXC + def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src), + "vmsr", "\tfpexc, $src", []>; + // System level GPR -> FPSID + def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src), + "vmsr", "\tfpsid, $src", []>; +} + +//===----------------------------------------------------------------------===// +// Misc. +// + // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { -def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), +def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_fpUNA64, - "vmov", ".f64\t$dst, $imm", - [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { + "vmov", ".f64\t$Dd, $imm", + [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { + // Instruction operands. + bits<5> Dd; + bits<32> imm; + + // Encode instruction operands. + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + let Inst{19} = imm{31}; + let Inst{18-16} = imm{22-20}; + let Inst{3-0} = imm{19-16}; + + // Encode remaining instruction bits. let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; let Inst{11-9} = 0b101; - let Inst{8} = 1; + let Inst{8} = 1; // Double precision. let Inst{7-4} = 0b0000; } -def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), - VFPMiscFrm, IIC_fpUNA32, - "vmov", ".f32\t$dst, $imm", - [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { +def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), + VFPMiscFrm, IIC_fpUNA32, + "vmov", ".f32\t$Sd, $imm", + [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { + // Instruction operands. + bits<5> Sd; + bits<32> imm; + + // Encode instruction operands. + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + let Inst{19} = imm{31}; // The immediate is handled as a double. + let Inst{18-16} = imm{22-20}; + let Inst{3-0} = imm{19-16}; + + // Encode remaining instruction bits. let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; let Inst{11-9} = 0b101; - let Inst{8} = 0; + let Inst{8} = 0; // Single precision. let Inst{7-4} = 0b0000; } } diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 5f6d7ee..45b7e48 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -22,7 +22,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Memory.h" +#include "llvm/Support/Memory.h" #include <cstdlib> using namespace llvm; @@ -43,7 +43,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; #define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) // CompilationCallback stub - We can't use a C function with inline assembly in -// it, because we the prolog/epilog inserted by GCC won't work for us (we need +// it, because the prolog/epilog inserted by GCC won't work for us. (We need // to preserve more context and manipulate the stack directly). Instead, // write our own wrapper, which does things our way, so we have complete // control over register saving and restoring. @@ -97,9 +97,10 @@ extern "C" { "str r0, [sp,#16]\n" // Return to the (newly modified) stub to invoke the real function. // The above twiddling of the saved return addresses allows us to - // deallocate everything, including the LR the stub saved, all in one - // pop instruction. - "ldmia sp!, {r0, r1, r2, r3, lr, pc}\n" + // deallocate everything, including the LR the stub saved, with two + // updating load instructions. + "ldmia sp!, {r0, r1, r2, r3, lr}\n" + "ldr pc, [sp], #4\n" ); #else // Not an ARM host void ARMCompilationCallback() { @@ -290,7 +291,7 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, *((intptr_t*)RelocPos) |= ResultPtr; // Set register Rn to PC. *((intptr_t*)RelocPos) |= - ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; break; } case ARM::reloc_arm_pic_jt: diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index f5d9eff..2f97928 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -105,7 +105,7 @@ namespace llvm { /// model is PIC. void Initialize(const MachineFunction &MF, bool isPIC) { const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - ConstPoolId2AddrMap.resize(AFI->getNumConstPoolEntries()); + ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); IsPIC = isPIC; } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 2b7645a..d9dc5cd 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -128,45 +128,153 @@ namespace { char ARMLoadStoreOpt::ID = 0; } -static int getLoadStoreMultipleOpcode(int Opcode) { +static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Opcode) { - case ARM::LDR: + default: llvm_unreachable("Unhandled opcode!"); + case ARM::LDRi12: ++NumLDMGened; - return ARM::LDM; - case ARM::STR: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::LDMIA; + case ARM_AM::da: return ARM::LDMDA; + case ARM_AM::db: return ARM::LDMDB; + case ARM_AM::ib: return ARM::LDMIB; + } + break; + case ARM::STRi12: ++NumSTMGened; - return ARM::STM; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::STMIA; + case ARM_AM::da: return ARM::STMDA; + case ARM_AM::db: return ARM::STMDB; + case ARM_AM::ib: return ARM::STMIB; + } + break; case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; - return ARM::t2LDM; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::t2LDMIA; + case ARM_AM::db: return ARM::t2LDMDB; + } + break; case ARM::t2STRi8: case ARM::t2STRi12: ++NumSTMGened; - return ARM::t2STM; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::t2STMIA; + case ARM_AM::db: return ARM::t2STMDB; + } + break; case ARM::VLDRS: ++NumVLDMGened; - return ARM::VLDMS; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VLDMSIA; + case ARM_AM::db: return ARM::VLDMSDB; + } + break; case ARM::VSTRS: ++NumVSTMGened; - return ARM::VSTMS; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VSTMSIA; + case ARM_AM::db: return ARM::VSTMSDB; + } + break; case ARM::VLDRD: ++NumVLDMGened; - return ARM::VLDMD; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VLDMDIA; + case ARM_AM::db: return ARM::VLDMDDB; + } + break; case ARM::VSTRD: ++NumVSTMGened; - return ARM::VSTMD; - default: llvm_unreachable("Unhandled opcode!"); + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VSTMDIA; + case ARM_AM::db: return ARM::VSTMDDB; + } + break; } + return 0; } +namespace llvm { + namespace ARM_AM { + +AMSubMode getLoadStoreMultipleSubMode(int Opcode) { + switch (Opcode) { + default: llvm_unreachable("Unhandled opcode!"); + case ARM::LDMIA_RET: + case ARM::LDMIA: + case ARM::LDMIA_UPD: + case ARM::STMIA: + case ARM::STMIA_UPD: + case ARM::t2LDMIA_RET: + case ARM::t2LDMIA: + case ARM::t2LDMIA_UPD: + case ARM::t2STMIA: + case ARM::t2STMIA_UPD: + case ARM::VLDMSIA: + case ARM::VLDMSIA_UPD: + case ARM::VSTMSIA: + case ARM::VSTMSIA_UPD: + case ARM::VLDMDIA: + case ARM::VLDMDIA_UPD: + case ARM::VSTMDIA: + case ARM::VSTMDIA_UPD: + return ARM_AM::ia; + + case ARM::LDMDA: + case ARM::LDMDA_UPD: + case ARM::STMDA: + case ARM::STMDA_UPD: + return ARM_AM::da; + + case ARM::LDMDB: + case ARM::LDMDB_UPD: + case ARM::STMDB: + case ARM::STMDB_UPD: + case ARM::t2LDMDB: + case ARM::t2LDMDB_UPD: + case ARM::t2STMDB: + case ARM::t2STMDB_UPD: + case ARM::VLDMSDB: + case ARM::VLDMSDB_UPD: + case ARM::VSTMSDB: + case ARM::VSTMSDB_UPD: + case ARM::VLDMDDB: + case ARM::VLDMDDB_UPD: + case ARM::VSTMDDB: + case ARM::VSTMDDB_UPD: + return ARM_AM::db; + + case ARM::LDMIB: + case ARM::LDMIB_UPD: + case ARM::STMIB: + case ARM::STMIB_UPD: + return ARM_AM::ib; + } + + return ARM_AM::bad_am_submode; +} + + } // end namespace ARM_AM +} // end namespace llvm + static bool isT2i32Load(unsigned Opc) { return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; } static bool isi32Load(unsigned Opc) { - return Opc == ARM::LDR || isT2i32Load(Opc); + return Opc == ARM::LDRi12 || isT2i32Load(Opc); } static bool isT2i32Store(unsigned Opc) { @@ -174,7 +282,7 @@ static bool isT2i32Store(unsigned Opc) { } static bool isi32Store(unsigned Opc) { - return Opc == ARM::STR || isT2i32Store(Opc); + return Opc == ARM::STRi12 || isT2i32Store(Opc); } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -245,10 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); - Opcode = getLoadStoreMultipleOpcode(Opcode); + Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg); + .addImm(Pred).addReg(PredReg); for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -271,22 +379,14 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // First calculate which of the registers should be killed by the merged // instruction. const unsigned insertPos = memOps[insertAfter].Position; - - SmallSet<unsigned, 4> UnavailRegs; SmallSet<unsigned, 4> KilledRegs; DenseMap<unsigned, unsigned> Killer; - for (unsigned i = 0; i < memOpsBegin; ++i) { - if (memOps[i].Position < insertPos && memOps[i].isKill) { - unsigned Reg = memOps[i].Reg; - if (memOps[i].Merged) - UnavailRegs.insert(Reg); - else { - KilledRegs.insert(Reg); - Killer[Reg] = i; - } + for (unsigned i = 0, e = memOps.size(); i != e; ++i) { + if (i == memOpsBegin) { + i = memOpsEnd; + if (i == e) + break; } - } - for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { if (memOps[i].Position < insertPos && memOps[i].isKill) { unsigned Reg = memOps[i].Reg; KilledRegs.insert(Reg); @@ -297,12 +397,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, SmallVector<std::pair<unsigned, bool>, 8> Regs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; - if (UnavailRegs.count(Reg)) - // Register is killed before and it's not easy / possible to update the - // kill marker on already merged instructions. Abort. - return; - - // If we are inserting the merged operation after an unmerged operation that + // If we are inserting the merged operation after an operation that // uses the same register, make sure to transfer any kill flag. bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); @@ -318,17 +413,24 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // Merge succeeded, update records. Merges.push_back(prior(Loc)); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - // Remove kill flags from any unmerged memops that come before insertPos. + // Remove kill flags from any memops that come before insertPos. if (Regs[i-memOpsBegin].second) { unsigned Reg = Regs[i-memOpsBegin].first; if (KilledRegs.count(Reg)) { unsigned j = Killer[Reg]; - memOps[j].MBBI->getOperand(0).setIsKill(false); + int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); + assert(Idx >= 0 && "Cannot find killing operand"); + memOps[j].MBBI->getOperand(Idx).setIsKill(false); memOps[j].isKill = false; } + memOps[i].isKill = true; } MBB.erase(memOps[i].MBBI); + // Update this memop to refer to the merged instruction. + // We may need to move kill flags again. memOps[i].Merged = true; + memOps[i].MBBI = Merges.back(); + memOps[i].Position = insertPos; } } @@ -349,7 +451,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, const MachineOperand &PMO = Loc->getOperand(0); unsigned PReg = PMO.getReg(); unsigned PRegNum = PMO.isUndef() ? UINT_MAX - : ARMRegisterInfo::getRegisterNumbering(PReg); + : getARMRegisterNumbering(PReg); unsigned Count = 1; for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { @@ -357,7 +459,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); unsigned Reg = MO.getReg(); unsigned RegNum = MO.isUndef() ? UINT_MAX - : ARMRegisterInfo::getRegisterNumbering(Reg); + : getARMRegisterNumbering(Reg); // Register numbers must be in ascending order. For VFP, the registers // must also be consecutive and there is a limit of 16 double-word // registers per instruction. @@ -440,8 +542,8 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { switch (MI->getOpcode()) { default: return 0; - case ARM::LDR: - case ARM::STR: + case ARM::LDRi12: + case ARM::STRi12: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -452,31 +554,109 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::VLDRD: case ARM::VSTRD: return 8; - case ARM::LDM: - case ARM::STM: - case ARM::t2LDM: - case ARM::t2STM: - case ARM::VLDMS: - case ARM::VSTMS: - return (MI->getNumOperands() - 4) * 4; - case ARM::VLDMD: - case ARM::VSTMD: - return (MI->getNumOperands() - 4) * 8; + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + case ARM::t2LDMIA: + case ARM::t2LDMDB: + case ARM::t2STMIA: + case ARM::t2STMDB: + case ARM::VLDMSIA: + case ARM::VLDMSDB: + case ARM::VSTMSIA: + case ARM::VSTMSDB: + return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; + case ARM::VLDMDIA: + case ARM::VLDMDDB: + case ARM::VSTMDIA: + case ARM::VSTMDDB: + return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; } } -static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { +static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, + ARM_AM::AMSubMode Mode) { switch (Opc) { - case ARM::LDM: return ARM::LDM_UPD; - case ARM::STM: return ARM::STM_UPD; - case ARM::t2LDM: return ARM::t2LDM_UPD; - case ARM::t2STM: return ARM::t2STM_UPD; - case ARM::VLDMS: return ARM::VLDMS_UPD; - case ARM::VLDMD: return ARM::VLDMD_UPD; - case ARM::VSTMS: return ARM::VSTMS_UPD; - case ARM::VSTMD: return ARM::VSTMD_UPD; default: llvm_unreachable("Unhandled opcode!"); + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::LDMIA_UPD; + case ARM_AM::ib: return ARM::LDMIB_UPD; + case ARM_AM::da: return ARM::LDMDA_UPD; + case ARM_AM::db: return ARM::LDMDB_UPD; + } + break; + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::STMIA_UPD; + case ARM_AM::ib: return ARM::STMIB_UPD; + case ARM_AM::da: return ARM::STMDA_UPD; + case ARM_AM::db: return ARM::STMDB_UPD; + } + break; + case ARM::t2LDMIA: + case ARM::t2LDMDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::t2LDMIA_UPD; + case ARM_AM::db: return ARM::t2LDMDB_UPD; + } + break; + case ARM::t2STMIA: + case ARM::t2STMDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::t2STMIA_UPD; + case ARM_AM::db: return ARM::t2STMDB_UPD; + } + break; + case ARM::VLDMSIA: + case ARM::VLDMSDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VLDMSIA_UPD; + case ARM_AM::db: return ARM::VLDMSDB_UPD; + } + break; + case ARM::VLDMDIA: + case ARM::VLDMDDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VLDMDIA_UPD; + case ARM_AM::db: return ARM::VLDMDDB_UPD; + } + break; + case ARM::VSTMSIA: + case ARM::VSTMSDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VSTMSIA_UPD; + case ARM_AM::db: return ARM::VSTMSDB_UPD; + } + break; + case ARM::VSTMDIA: + case ARM::VSTMDDB: + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::VSTMDIA_UPD; + case ARM_AM::db: return ARM::VSTMDDB_UPD; + } + break; } + return 0; } @@ -505,16 +685,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); - bool DoMerge = false; - ARM_AM::AMSubMode Mode = ARM_AM::ia; - // Can't use an updating ld/st if the base register is also a dest // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. - for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) if (MI->getOperand(i).getReg() == Base) return false; - } - Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); + + bool DoMerge = false; + ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); // Try merging with the previous instruction. MachineBasicBlock::iterator BeginMBBI = MBB.begin(); @@ -560,15 +738,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (!DoMerge) return false; - unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); + unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(Mode)) .addImm(Pred).addReg(PredReg); + // Transfer the rest of operands. - for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) + for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); + // Transfer memoperands. (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -576,14 +755,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, return true; } -static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { +static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, + ARM_AM::AddrOpc Mode) { switch (Opc) { - case ARM::LDR: return ARM::LDR_PRE; - case ARM::STR: return ARM::STR_PRE; - case ARM::VLDRS: return ARM::VLDMS_UPD; - case ARM::VLDRD: return ARM::VLDMD_UPD; - case ARM::VSTRS: return ARM::VSTMS_UPD; - case ARM::VSTRD: return ARM::VSTMD_UPD; + case ARM::LDRi12: + return ARM::LDR_PRE; + case ARM::STRi12: + return ARM::STR_PRE; + case ARM::VLDRS: + return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; + case ARM::VLDRD: + return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; + case ARM::VSTRS: + return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; + case ARM::VSTRD: + return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_PRE; @@ -595,14 +781,21 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { return 0; } -static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { +static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, + ARM_AM::AddrOpc Mode) { switch (Opc) { - case ARM::LDR: return ARM::LDR_POST; - case ARM::STR: return ARM::STR_POST; - case ARM::VLDRS: return ARM::VLDMS_UPD; - case ARM::VLDRD: return ARM::VLDMD_UPD; - case ARM::VSTRS: return ARM::VSTMS_UPD; - case ARM::VSTRD: return ARM::VSTMD_UPD; + case ARM::LDRi12: + return ARM::LDR_POST; + case ARM::STRi12: + return ARM::STR_POST; + case ARM::VLDRS: + return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; + case ARM::VLDRD: + return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; + case ARM::VSTRS: + return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; + case ARM::VSTRD: + return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_POST; @@ -629,14 +822,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, DebugLoc dl = MI->getDebugLoc(); bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); - bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); - if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) - return false; - if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) - return false; - if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) + bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); + if (isi32Load(Opcode) || isi32Store(Opcode)) if (MI->getOperand(2).getImm() != 0) return false; + if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) + return false; bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be @@ -666,7 +857,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, DoMerge = true; } if (DoMerge) { - NewOpc = getPreIndexedLoadStoreOpcode(Opcode); + NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); MBB.erase(PrevMBBI); } } @@ -685,7 +876,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, DoMerge = true; } if (DoMerge) { - NewOpc = getPostIndexedLoadStoreOpcode(Opcode); + NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); if (NextMBBI == I) { Advance = true; ++I; @@ -698,12 +889,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; unsigned Offset = 0; - if (isAM5) - Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ? - ARM_AM::db : ARM_AM::ia); - else if (isAM2) + if (isAM2) Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); - else + else if (!isAM5) Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; if (isAM5) { @@ -715,7 +903,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getDefRegState(true)) // WB base register .addReg(Base, getKillRegState(isLd ? BaseKill : false)) - .addImm(Offset) .addImm(Pred).addReg(PredReg) .addReg(MO.getReg(), (isLd ? getDefRegState(true) : getKillRegState(MO.isKill()))); @@ -782,15 +969,14 @@ static bool isMemoryOp(const MachineInstr *MI) { int Opcode = MI->getOpcode(); switch (Opcode) { default: break; - case ARM::LDR: - case ARM::STR: - return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; case ARM::VLDRS: case ARM::VSTRS: return MI->getOperand(1).isReg(); case ARM::VLDRD: case ARM::VSTRD: return MI->getOperand(1).isReg(); + case ARM::LDRi12: + case ARM::STRi12: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -818,24 +1004,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { static int getMemoryOpOffset(const MachineInstr *MI) { int Opcode = MI->getOpcode(); - bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; unsigned NumOperands = MI->getDesc().getNumOperands(); unsigned OffField = MI->getOperand(NumOperands-3).getImm(); if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) + Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || + Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) return OffField; - int Offset = isAM2 - ? ARM_AM::getAM2Offset(OffField) - : (isAM3 ? ARM_AM::getAM3Offset(OffField) - : ARM_AM::getAM5Offset(OffField) * 4); - if (isAM2) { - if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else if (isAM3) { + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4; + if (isAM3) { if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) Offset = -Offset; } else { @@ -847,35 +1028,24 @@ static int getMemoryOpOffset(const MachineInstr *MI) { static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - int OffImm, bool isDef, + int Offset, bool isDef, DebugLoc dl, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, - unsigned OffReg, bool OffKill, bool OffUndef, + bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, unsigned PredReg, const TargetInstrInfo *TII, bool isT2) { - int Offset = OffImm; - if (!isT2) { - if (OffImm < 0) - Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); - else - Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); - } if (isDef) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); - if (!isT2) - MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); } else { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); - if (!isT2) - MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); } } @@ -906,23 +1076,21 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned BaseReg = BaseOp.getReg(); bool BaseKill = BaseOp.isKill(); bool BaseUndef = BaseOp.isUndef(); - unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); int OffImm = getMemoryOpOffset(MI); unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); - if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { + if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a // ldm or stm. unsigned NewOpc = (isLd) - ? (isT2 ? ARM::t2LDM : ARM::LDM) - : (isT2 ? ARM::t2STM : ARM::STM); + ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) + : (isT2 ? ARM::t2STMIA : ARM::STMIA); if (isLd) { BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(BaseReg, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(Pred).addReg(PredReg) .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); @@ -930,7 +1098,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, } else { BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(BaseReg, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(Pred).addReg(PredReg) .addReg(EvenReg, getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) @@ -941,28 +1108,24 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, NewBBI = llvm::prior(MBBI); } else { // Split into two instructions. - assert((!isT2 || !OffReg) && - "Thumb2 ldrd / strd does not encode offset register!"); unsigned NewOpc = (isLd) - ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) - : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); + ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. if (isLd && (BaseKill || OffKill) && - (TRI->regsOverlap(EvenReg, BaseReg) || - (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { - assert(!TRI->regsOverlap(OddReg, BaseReg) && - (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); + (TRI->regsOverlap(EvenReg, BaseReg))) { + assert(!TRI->regsOverlap(OddReg, BaseReg)); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, false, - BaseReg, false, BaseUndef, OffReg, false, OffUndef, + BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, - BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, + BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); } else { if (OddReg == EvenReg && EvenDeadKill) { @@ -974,12 +1137,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, } InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, - BaseReg, false, BaseUndef, OffReg, false, OffUndef, + BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, OddUndef, - BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, + BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); } if (isLd) @@ -1158,17 +1321,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { return NumMerges > 0; } -namespace { - struct OffsetCompare { - bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { - int LOffset = getMemoryOpOffset(LHS); - int ROffset = getMemoryOpOffset(RHS); - assert(LHS == RHS || LOffset != ROffset); - return LOffset > ROffset; - } - }; -} - /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops /// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it /// directly restore the value of LR into pc. @@ -1182,20 +1334,25 @@ namespace { bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { if (MBB.empty()) return false; - MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); if (MBBI != MBB.begin() && (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET || MBBI->getOpcode() == ARM::MOVPCLR)) { MachineInstr *PrevMI = prior(MBBI); - if (PrevMI->getOpcode() == ARM::LDM_UPD || - PrevMI->getOpcode() == ARM::t2LDM_UPD) { + unsigned Opcode = PrevMI->getOpcode(); + if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || + Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || + Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); if (MO.getReg() != ARM::LR) return false; - unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; + unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); + assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || + Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI->setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); + PrevMI->copyImplicitOps(&*MBBI); MBB.erase(MBBI); return true; } @@ -1216,7 +1373,8 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { ++MFI) { MachineBasicBlock &MBB = *MFI; Modified |= LoadStoreMultipleOpti(MBB); - Modified |= MergeReturnIntoLDM(MBB); + if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) + Modified |= MergeReturnIntoLDM(MBB); } delete RS; @@ -1250,7 +1408,7 @@ namespace { bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, unsigned &OddReg, unsigned &BaseReg, - unsigned &OffReg, int &Offset, + int &Offset, unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2); bool RescheduleOps(MachineBasicBlock *MBB, @@ -1292,7 +1450,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, if (I->isDebugValue() || MemOps.count(&*I)) continue; const TargetInstrDesc &TID = I->getDesc(); - if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) + if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects()) return false; if (isLd && TID.mayStore()) return false; @@ -1330,8 +1488,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, unsigned &OddReg, unsigned &BaseReg, - unsigned &OffReg, int &Offset, - unsigned &PredReg, + int &Offset, unsigned &PredReg, ARMCC::CondCodes &Pred, bool &isT2) { // Make sure we're allowed to generate LDRD/STRD. @@ -1341,9 +1498,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); - if (Opcode == ARM::LDR) + if (Opcode == ARM::LDRi12) NewOpc = ARM::LDRD; - else if (Opcode == ARM::STR) + else if (Opcode == ARM::STRi12) NewOpc = ARM::STRD; else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { NewOpc = ARM::t2LDRDi8; @@ -1356,12 +1513,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, } else return false; - // Make sure the offset registers match. - if (!isT2 && - (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) - return false; - - // Must sure the base address satisfies i64 ld / st alignment requirement. + // Make sure the base address satisfies i64 ld / st alignment requirement. if (!Op0->hasOneMemOperand() || !(*Op0->memoperands_begin())->getValue() || (*Op0->memoperands_begin())->isVolatile()) @@ -1370,7 +1522,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, unsigned Align = (*Op0->memoperands_begin())->getAlignment(); const Function *Func = MF->getFunction(); unsigned ReqAlign = STI->hasV6Ops() - ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) + ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) : 8; // Pre-v6 need 8-byte align if (Align < ReqAlign) return false; @@ -1404,13 +1556,22 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (EvenReg == OddReg) return false; BaseReg = Op0->getOperand(1).getReg(); - if (!isT2) - OffReg = Op0->getOperand(2).getReg(); Pred = llvm::getInstrPredicate(Op0, PredReg); dl = Op0->getDebugLoc(); return true; } +namespace { + struct OffsetCompare { + bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { + int LOffset = getMemoryOpOffset(LHS); + int ROffset = getMemoryOpOffset(RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + } + }; +} + bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, SmallVector<MachineInstr*, 4> &Ops, unsigned Base, bool isLd, @@ -1493,14 +1654,14 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MachineInstr *Op0 = Ops.back(); MachineInstr *Op1 = Ops[Ops.size()-2]; unsigned EvenReg = 0, OddReg = 0; - unsigned BaseReg = 0, OffReg = 0, PredReg = 0; + unsigned BaseReg = 0, PredReg = 0; ARMCC::CondCodes Pred = ARMCC::AL; bool isT2 = false; unsigned NewOpc = 0; int Offset = 0; DebugLoc dl; if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, - EvenReg, OddReg, BaseReg, OffReg, + EvenReg, OddReg, BaseReg, Offset, PredReg, Pred, isT2)) { Ops.pop_back(); Ops.pop_back(); @@ -1512,8 +1673,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, .addReg(EvenReg, RegState::Define) .addReg(OddReg, RegState::Define) .addReg(BaseReg); + // FIXME: We're converting from LDRi12 to an insn that still + // uses addrmode2, so we need an explicit offset reg. It should + // always by reg0 since we're transforming LDRi12s. if (!isT2) - MIB.addReg(OffReg); + MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); ++NumLDRDFormed; } else { @@ -1522,8 +1686,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, .addReg(EvenReg) .addReg(OddReg) .addReg(BaseReg); + // FIXME: We're converting from LDRi12 to an insn that still + // uses addrmode2, so we need an explicit offset reg. It should + // always by reg0 since we're transforming STRi12s. if (!isT2) - MIB.addReg(OffReg); + MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); ++NumSTRDFormed; } diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp new file mode 100644 index 0000000..6d7b485 --- /dev/null +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -0,0 +1,1230 @@ +//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARMMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMFixupKinds.h" +#include "ARMInstrInfo.h" +#include "ARMMCExpr.h" +#include "ARMSubtarget.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); +STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); + +namespace { +class ARMMCCodeEmitter : public MCCodeEmitter { + ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT + const TargetMachine &TM; + const TargetInstrInfo &TII; + const ARMSubtarget *Subtarget; + MCContext &Ctx; + +public: + ARMMCCodeEmitter(TargetMachine &tm, MCContext &ctx) + : TM(tm), TII(*TM.getInstrInfo()), + Subtarget(&TM.getSubtarget<ARMSubtarget>()), Ctx(ctx) { + } + + ~ARMMCCodeEmitter() {} + + unsigned getMachineSoImmOpValue(unsigned SoImm) const; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + unsigned getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of + /// the specified operand. This is used for operands with :lower16: and + /// :upper16: prefixes. + uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, + unsigned &Reg, unsigned &Imm, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate + /// BL branch target. + uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate + /// BLX branch target. + uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target. + uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target. + uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target. + uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getBranchTargetOpValue - Return encoding info for 24-bit immediate + /// branch target. + uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit + /// immediate Thumb2 direct branch target. + uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate + /// branch target. + uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getAdrLabelOpValue - Return encoding info for 12-bit immediate + /// ADR label target. + uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' + /// operand. + uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand. + uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups)const; + + /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2' + /// operand. + uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + + /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm' + /// operand as needed by load/store instructions. + uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getLdStmModeOpValue - Return encoding for load/store multiple mode. + uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm(); + switch (Mode) { + default: assert(0 && "Unknown addressing sub-mode!"); + case ARM_AM::da: return 0; + case ARM_AM::ia: return 1; + case ARM_AM::db: return 2; + case ARM_AM::ib: return 3; + } + } + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. + /// + unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const { + switch (ShOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::no_shift: + case ARM_AM::lsl: return 0; + case ARM_AM::lsr: return 1; + case ARM_AM::asr: return 2; + case ARM_AM::ror: + case ARM_AM::rrx: return 3; + } + return 0; + } + + /// getAddrMode2OpValue - Return encoding for addrmode2 operands. + uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands. + uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands. + uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getAddrMode3OpValue - Return encoding for addrmode3 operands. + uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12' + /// operand. + uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getAddrModeISOpValue - Encode the t_addrmode_is# operands. + uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands. + uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand. + uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getCCOutOpValue - Return encoding of the 's' bit. + unsigned getCCOutOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or + // '1' respectively. + return MI.getOperand(Op).getReg() == ARM::CPSR; + } + + /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value. + unsigned getSOImmOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned SoImm = MI.getOperand(Op).getImm(); + int SoImmVal = ARM_AM::getSOImmVal(SoImm); + assert(SoImmVal != -1 && "Not a valid so_imm value!"); + + // Encode rotate_imm. + unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1) + << ARMII::SoRotImmShift; + + // Encode immed_8. + Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal); + return Binary; + } + + /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value. + unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned SoImm = MI.getOperand(Op).getImm(); + unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm); + assert(Encoded != ~0U && "Not a Thumb2 so_imm value?"); + return Encoded; + } + + unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getSORegOpValue - Return an encoded so_reg shifted register value. + unsigned getSORegOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getRotImmOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + switch (MI.getOperand(Op).getImm()) { + default: assert (0 && "Not a valid rot_imm value!"); + case 0: return 0; + case 8: return 1; + case 16: return 2; + case 24: return 3; + } + } + + unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return MI.getOperand(Op).getImm() - 1; + } + + unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 64 - MI.getOperand(Op).getImm(); + } + + unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getMsbOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + + unsigned NEONThumb2DataIPostEncoder(const MCInst &MI, + unsigned EncodedValue) const; + unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI, + unsigned EncodedValue) const; + unsigned NEONThumb2DupPostEncoder(const MCInst &MI, + unsigned EncodedValue) const; + + unsigned VFPThumb2PostEncoder(const MCInst &MI, + unsigned EncodedValue) const; + + void EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; + } + + void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const { + // Output the constant in little endian byte order. + for (unsigned i = 0; i != Size; ++i) { + EmitByte(Val & 255, OS); + Val >>= 8; + } + } + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; +}; + +} // end anonymous namespace + +MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx) { + return new ARMMCCodeEmitter(TM, Ctx); +} + +/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing +/// instructions, and rewrite them to their Thumb2 form if we are currently in +/// Thumb2 mode. +unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI, + unsigned EncodedValue) const { + if (Subtarget->isThumb2()) { + // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved + // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are + // set to 1111. + unsigned Bit24 = EncodedValue & 0x01000000; + unsigned Bit28 = Bit24 << 4; + EncodedValue &= 0xEFFFFFFF; + EncodedValue |= Bit28; + EncodedValue |= 0x0F000000; + } + + return EncodedValue; +} + +/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store +/// instructions, and rewrite them to their Thumb2 form if we are currently in +/// Thumb2 mode. +unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI, + unsigned EncodedValue) const { + if (Subtarget->isThumb2()) { + EncodedValue &= 0xF0FFFFFF; + EncodedValue |= 0x09000000; + } + + return EncodedValue; +} + +/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup +/// instructions, and rewrite them to their Thumb2 form if we are currently in +/// Thumb2 mode. +unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI, + unsigned EncodedValue) const { + if (Subtarget->isThumb2()) { + EncodedValue &= 0x00FFFFFF; + EncodedValue |= 0xEE000000; + } + + return EncodedValue; +} + +/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite +/// them to their Thumb2 form if we are currently in Thumb2 mode. +unsigned ARMMCCodeEmitter:: +VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const { + if (Subtarget->isThumb2()) { + EncodedValue &= 0x0FFFFFFF; + EncodedValue |= 0xE0000000; + } + return EncodedValue; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned ARMMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + unsigned RegNo = getARMRegisterNumbering(Reg); + + // Q registers are encoded as 2x their register number. + switch (Reg) { + default: + return RegNo; + case ARM::Q0: case ARM::Q1: case ARM::Q2: case ARM::Q3: + case ARM::Q4: case ARM::Q5: case ARM::Q6: case ARM::Q7: + case ARM::Q8: case ARM::Q9: case ARM::Q10: case ARM::Q11: + case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15: + return 2 * RegNo; + } + } else if (MO.isImm()) { + return static_cast<unsigned>(MO.getImm()); + } else if (MO.isFPImm()) { + return static_cast<unsigned>(APFloat(MO.getFPImm()) + .bitcastToAPInt().getHiBits(32).getLimitedValue()); + } + + llvm_unreachable("Unable to encode MCOperand!"); + return 0; +} + +/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand. +bool ARMMCCodeEmitter:: +EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, + unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + + Reg = getARMRegisterNumbering(MO.getReg()); + + int32_t SImm = MO1.getImm(); + bool isAdd = true; + + // Special value for #-0 + if (SImm == INT32_MIN) + SImm = 0; + + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (SImm < 0) { + SImm = -SImm; + isAdd = false; + } + + Imm = SImm; + return isAdd; +} + +/// getBranchTargetOpValue - Helper function to get the branch target operand, +/// which is either an immediate or requires a fixup. +static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + unsigned FixupKind, + SmallVectorImpl<MCFixup> &Fixups) { + const MCOperand &MO = MI.getOperand(OpIdx); + + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) return MO.getImm(); + assert(MO.isExpr() && "Unexpected branch target type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(FixupKind); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + // All of the information is in the fixup. + return 0; +} + +/// getThumbBLTargetOpValue - Return encoding info for immediate branch target. +uint32_t ARMMCCodeEmitter:: +getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups); +} + +/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate +/// BLX branch target. +uint32_t ARMMCCodeEmitter:: +getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups); +} + +/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target. +uint32_t ARMMCCodeEmitter:: +getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups); +} + +/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target. +uint32_t ARMMCCodeEmitter:: +getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups); +} + +/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target. +uint32_t ARMMCCodeEmitter:: +getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups); +} + +/// Return true if this branch has a non-always predication +static bool HasConditionalBranch(const MCInst &MI) { + int NumOp = MI.getNumOperands(); + if (NumOp >= 2) { + for (int i = 0; i < NumOp-1; ++i) { + const MCOperand &MCOp1 = MI.getOperand(i); + const MCOperand &MCOp2 = MI.getOperand(i + 1); + if (MCOp1.isImm() && MCOp2.isReg() && + (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) { + if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL) + return true; + } + } + } + return false; +} + +/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch +/// target. +uint32_t ARMMCCodeEmitter:: +getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // FIXME: This really, really shouldn't use TargetMachine. We don't want + // coupling between MC and TM anywhere we can help it. + if (Subtarget->isThumb2()) + return + ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups); + return getARMBranchTargetOpValue(MI, OpIdx, Fixups); +} + +/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch +/// target. +uint32_t ARMMCCodeEmitter:: +getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + if (HasConditionalBranch(MI)) + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbranch, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_uncondbranch, Fixups); +} + + + + +/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit +/// immediate branch target. +uint32_t ARMMCCodeEmitter:: +getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned Val = + ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups); + bool I = (Val & 0x800000); + bool J1 = (Val & 0x400000); + bool J2 = (Val & 0x200000); + if (I ^ J1) + Val &= ~0x400000; + else + Val |= 0x400000; + + if (I ^ J2) + Val &= ~0x200000; + else + Val |= 0x200000; + + return Val; +} + +/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label +/// target. +uint32_t ARMMCCodeEmitter:: +getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12, + Fixups); +} + +/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label +/// target. +uint32_t ARMMCCodeEmitter:: +getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, + Fixups); +} + +/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label +/// target. +uint32_t ARMMCCodeEmitter:: +getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!"); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10, + Fixups); +} + +/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg' +/// operand. +uint32_t ARMMCCodeEmitter:: +getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &) const { + // [Rn, Rm] + // {5-3} = Rm + // {2-0} = Rn + const MCOperand &MO1 = MI.getOperand(OpIdx); + const MCOperand &MO2 = MI.getOperand(OpIdx + 1); + unsigned Rn = getARMRegisterNumbering(MO1.getReg()); + unsigned Rm = getARMRegisterNumbering(MO2.getReg()); + return (Rm << 3) | Rn; +} + +/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand. +uint32_t ARMMCCodeEmitter:: +getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {17-13} = reg + // {12} = (U)nsigned (add == '1', sub == '0') + // {11-0} = imm12 + unsigned Reg, Imm12; + bool isAdd = true; + // If The first operand isn't a register, we have a label reference. + const MCOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Imm12 = 0; + isAdd = false ; // 'U' bit is set as part of the fixup. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + + MCFixupKind Kind; + if (Subtarget->isThumb2()) + Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); + else + Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + ++MCNumCPRelocations; + } else + isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups); + + uint32_t Binary = Imm12 & 0xfff; + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (isAdd) + Binary |= (1 << 12); + Binary |= (Reg << 13); + return Binary; +} + +/// getT2AddrModeImm8s4OpValue - Return encoding info for +/// 'reg +/- imm8<<2' operand. +uint32_t ARMMCCodeEmitter:: +getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {12-9} = reg + // {8} = (U)nsigned (add == '1', sub == '0') + // {7-0} = imm8 + unsigned Reg, Imm8; + bool isAdd = true; + // If The first operand isn't a register, we have a label reference. + const MCOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Imm8 = 0; + isAdd = false ; // 'U' bit is set as part of the fixup. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + ++MCNumCPRelocations; + } else + isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups); + + uint32_t Binary = (Imm8 >> 2) & 0xff; + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (isAdd) + Binary |= (1 << 8); + Binary |= (Reg << 9); + return Binary; +} + +// FIXME: This routine assumes that a binary +// expression will always result in a PCRel expression +// In reality, its only true if one or more subexpressions +// is itself a PCRel (i.e. "." in asm or some other pcrel construct) +// but this is good enough for now. +static bool EvaluateAsPCRel(const MCExpr *Expr) { + switch (Expr->getKind()) { + default: assert(0 && "Unexpected expression type"); + case MCExpr::SymbolRef: return false; + case MCExpr::Binary: return true; + } +} + +uint32_t +ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {20-16} = imm{15-12} + // {11-0} = imm{11-0} + const MCOperand &MO = MI.getOperand(OpIdx); + if (MO.isImm()) + // Hi / lo 16 bits already extracted during earlier passes. + return static_cast<unsigned>(MO.getImm()); + + // Handle :upper16: and :lower16: assembly prefixes. + const MCExpr *E = MO.getExpr(); + if (E->getKind() == MCExpr::Target) { + const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E); + E = ARM16Expr->getSubExpr(); + + MCFixupKind Kind; + switch (ARM16Expr->getKind()) { + default: assert(0 && "Unsupported ARMFixup"); + case ARMMCExpr::VK_ARM_HI16: + if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E)) + Kind = MCFixupKind(Subtarget->isThumb2() + ? ARM::fixup_t2_movt_hi16_pcrel + : ARM::fixup_arm_movt_hi16_pcrel); + else + Kind = MCFixupKind(Subtarget->isThumb2() + ? ARM::fixup_t2_movt_hi16 + : ARM::fixup_arm_movt_hi16); + break; + case ARMMCExpr::VK_ARM_LO16: + if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E)) + Kind = MCFixupKind(Subtarget->isThumb2() + ? ARM::fixup_t2_movw_lo16_pcrel + : ARM::fixup_arm_movw_lo16_pcrel); + else + Kind = MCFixupKind(Subtarget->isThumb2() + ? ARM::fixup_t2_movw_lo16 + : ARM::fixup_arm_movw_lo16); + break; + } + Fixups.push_back(MCFixup::Create(0, E, Kind)); + return 0; + }; + + llvm_unreachable("Unsupported MCExpr type in MCOperand!"); + return 0; +} + +uint32_t ARMMCCodeEmitter:: +getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx+1); + const MCOperand &MO2 = MI.getOperand(OpIdx+2); + unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rm = getARMRegisterNumbering(MO1.getReg()); + unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()); + bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add; + ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); + unsigned SBits = getShiftOp(ShOp); + + // {16-13} = Rn + // {12} = isAdd + // {11-0} = shifter + // {3-0} = Rm + // {4} = 0 + // {6-5} = type + // {11-7} = imm + uint32_t Binary = Rm; + Binary |= Rn << 13; + Binary |= SBits << 5; + Binary |= ShImm << 7; + if (isAdd) + Binary |= 1 << 12; + return Binary; +} + +uint32_t ARMMCCodeEmitter:: +getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {17-14} Rn + // {13} 1 == imm12, 0 == Rm + // {12} isAdd + // {11-0} imm12/Rm + const MCOperand &MO = MI.getOperand(OpIdx); + unsigned Rn = getARMRegisterNumbering(MO.getReg()); + uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups); + Binary |= Rn << 14; + return Binary; +} + +uint32_t ARMMCCodeEmitter:: +getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {13} 1 == imm12, 0 == Rm + // {12} isAdd + // {11-0} imm12/Rm + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx+1); + unsigned Imm = MO1.getImm(); + bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add; + bool isReg = MO.getReg() != 0; + uint32_t Binary = ARM_AM::getAM2Offset(Imm); + // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12 + if (isReg) { + ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm); + Binary <<= 7; // Shift amount is bits [11:7] + Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5] + Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0] + } + return Binary | (isAdd << 12) | (isReg << 13); +} + +uint32_t ARMMCCodeEmitter:: +getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {9} 1 == imm8, 0 == Rm + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx+1); + unsigned Imm = MO1.getImm(); + bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; + bool isImm = MO.getReg() == 0; + uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); + // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 + if (!isImm) + Imm8 = getARMRegisterNumbering(MO.getReg()); + return Imm8 | (isAdd << 8) | (isImm << 9); +} + +uint32_t ARMMCCodeEmitter:: +getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {13} 1 == imm8, 0 == Rm + // {12-9} Rn + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx+1); + const MCOperand &MO2 = MI.getOperand(OpIdx+2); + unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Imm = MO2.getImm(); + bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; + bool isImm = MO1.getReg() == 0; + uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); + // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 + if (!isImm) + Imm8 = getARMRegisterNumbering(MO1.getReg()); + return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13); +} + +/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands. +uint32_t ARMMCCodeEmitter:: +getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // [SP, #imm] + // {7-0} = imm8 + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + assert(MI.getOperand(OpIdx).getReg() == ARM::SP && + "Unexpected base register!"); + + // The immediate is already shifted for the implicit zeroes, so no change + // here. + return MO1.getImm() & 0xff; +} + +/// getAddrModeISOpValue - Encode the t_addrmode_is# operands. +uint32_t ARMMCCodeEmitter:: +getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // [Rn, #imm] + // {7-3} = imm5 + // {2-0} = Rn + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Imm5 = MO1.getImm(); + return ((Imm5 & 0x1f) << 3) | Rn; +} + +/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands. +uint32_t ARMMCCodeEmitter:: +getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups); +} + +/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand. +uint32_t ARMMCCodeEmitter:: +getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // {12-9} = reg + // {8} = (U)nsigned (add == '1', sub == '0') + // {7-0} = imm8 + unsigned Reg, Imm8; + bool isAdd; + // If The first operand isn't a register, we have a label reference. + const MCOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Imm8 = 0; + isAdd = false; // 'U' bit is handled as part of the fixup. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind; + if (Subtarget->isThumb2()) + Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); + else + Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); + Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + + ++MCNumCPRelocations; + } else { + EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups); + isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add; + } + + uint32_t Binary = ARM_AM::getAM5Offset(Imm8); + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (isAdd) + Binary |= (1 << 8); + Binary |= (Reg << 9); + return Binary; +} + +unsigned ARMMCCodeEmitter:: +getSORegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be + // shifted. The second is either Rs, the amount to shift by, or reg0 in which + // case the imm contains the amount to shift by. + // + // {3-0} = Rm. + // {4} = 1 if reg shift, 0 if imm shift + // {6-5} = type + // If reg shift: + // {11-8} = Rs + // {7} = 0 + // else (imm shift) + // {11-7} = imm + + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + const MCOperand &MO2 = MI.getOperand(OpIdx + 2); + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); + + // Encode Rm. + unsigned Binary = getARMRegisterNumbering(MO.getReg()); + + // Encode the shift opcode. + unsigned SBits = 0; + unsigned Rs = MO1.getReg(); + if (Rs) { + // Set shift operand (bit[7:4]). + // LSL - 0001 + // LSR - 0011 + // ASR - 0101 + // ROR - 0111 + // RRX - 0110 and bit[11:8] clear. + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x1; break; + case ARM_AM::lsr: SBits = 0x3; break; + case ARM_AM::asr: SBits = 0x5; break; + case ARM_AM::ror: SBits = 0x7; break; + case ARM_AM::rrx: SBits = 0x6; break; + } + } else { + // Set shift operand (bit[6:4]). + // LSL - 000 + // LSR - 010 + // ASR - 100 + // ROR - 110 + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x0; break; + case ARM_AM::lsr: SBits = 0x2; break; + case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::ror: SBits = 0x6; break; + } + } + + Binary |= SBits << 4; + if (SOpc == ARM_AM::rrx) + return Binary; + + // Encode the shift operation Rs or shift_imm (except rrx). + if (Rs) { + // Encode Rs bit[11:8]. + assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); + return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + } + + // Encode shift_imm bit[11:7]. + return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; +} + +unsigned ARMMCCodeEmitter:: +getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO1 = MI.getOperand(OpNum); + const MCOperand &MO2 = MI.getOperand(OpNum+1); + const MCOperand &MO3 = MI.getOperand(OpNum+2); + + // Encoded as [Rn, Rm, imm]. + // FIXME: Needs fixup support. + unsigned Value = getARMRegisterNumbering(MO1.getReg()); + Value <<= 4; + Value |= getARMRegisterNumbering(MO2.getReg()); + Value <<= 2; + Value |= MO3.getImm(); + + return Value; +} + +unsigned ARMMCCodeEmitter:: +getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO1 = MI.getOperand(OpNum); + const MCOperand &MO2 = MI.getOperand(OpNum+1); + + // FIXME: Needs fixup support. + unsigned Value = getARMRegisterNumbering(MO1.getReg()); + + // Even though the immediate is 8 bits long, we need 9 bits in order + // to represent the (inverse of the) sign bit. + Value <<= 9; + int32_t tmp = (int32_t)MO2.getImm(); + if (tmp < 0) + tmp = abs(tmp); + else + Value |= 256; // Set the ADD bit + Value |= tmp & 255; + return Value; +} + +unsigned ARMMCCodeEmitter:: +getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO1 = MI.getOperand(OpNum); + + // FIXME: Needs fixup support. + unsigned Value = 0; + int32_t tmp = (int32_t)MO1.getImm(); + if (tmp < 0) + tmp = abs(tmp); + else + Value |= 256; // Set the ADD bit + Value |= tmp & 255; + return Value; +} + +unsigned ARMMCCodeEmitter:: +getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO1 = MI.getOperand(OpNum); + + // FIXME: Needs fixup support. + unsigned Value = 0; + int32_t tmp = (int32_t)MO1.getImm(); + if (tmp < 0) + tmp = abs(tmp); + else + Value |= 4096; // Set the ADD bit + Value |= tmp & 4095; + return Value; +} + +unsigned ARMMCCodeEmitter:: +getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + // Sub-operands are [reg, imm]. The first register is Rm, the reg to be + // shifted. The second is the amount to shift by. + // + // {3-0} = Rm. + // {4} = 0 + // {6-5} = type + // {11-7} = imm + + const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); + + // Encode Rm. + unsigned Binary = getARMRegisterNumbering(MO.getReg()); + + // Encode the shift opcode. + unsigned SBits = 0; + // Set shift operand (bit[6:4]). + // LSL - 000 + // LSR - 010 + // ASR - 100 + // ROR - 110 + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x0; break; + case ARM_AM::lsr: SBits = 0x2; break; + case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::ror: SBits = 0x6; break; + } + + Binary |= SBits << 4; + if (SOpc == ARM_AM::rrx) + return Binary; + + // Encode shift_imm bit[11:7]. + return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7; +} + +unsigned ARMMCCodeEmitter:: +getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the + // msb of the mask. + const MCOperand &MO = MI.getOperand(Op); + uint32_t v = ~MO.getImm(); + uint32_t lsb = CountTrailingZeros_32(v); + uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1; + assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!"); + return lsb | (msb << 5); +} + +unsigned ARMMCCodeEmitter:: +getMsbOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + // MSB - 5 bits. + uint32_t lsb = MI.getOperand(Op-1).getImm(); + uint32_t width = MI.getOperand(Op).getImm(); + uint32_t msb = lsb+width-1; + assert (width != 0 && msb < 32 && "Illegal bit width!"); + return msb; +} + +unsigned ARMMCCodeEmitter:: +getRegisterListOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + // VLDM/VSTM: + // {12-8} = Vd + // {7-0} = Number of registers + // + // LDM/STM: + // {15-0} = Bitfield of GPRs. + unsigned Reg = MI.getOperand(Op).getReg(); + bool SPRRegs = ARM::SPRRegClass.contains(Reg); + bool DPRRegs = ARM::DPRRegClass.contains(Reg); + + unsigned Binary = 0; + + if (SPRRegs || DPRRegs) { + // VLDM/VSTM + unsigned RegNo = getARMRegisterNumbering(Reg); + unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff; + Binary |= (RegNo & 0x1f) << 8; + if (SPRRegs) + Binary |= NumRegs; + else + Binary |= NumRegs * 2; + } else { + for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { + unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg()); + Binary |= 1 << RegNo; + } + } + + return Binary; +} + +/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along +/// with the alignment operand. +unsigned ARMMCCodeEmitter:: +getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &Reg = MI.getOperand(Op); + const MCOperand &Imm = MI.getOperand(Op + 1); + + unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned Align = 0; + + switch (Imm.getImm()) { + default: break; + case 2: + case 4: + case 8: Align = 0x01; break; + case 16: Align = 0x02; break; + case 32: Align = 0x03; break; + } + + return RegNo | (Align << 4); +} + +/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and +/// alignment operand for use in VLD-dup instructions. This is the same as +/// getAddrMode6AddressOpValue except for the alignment encoding, which is +/// different for VLD4-dup. +unsigned ARMMCCodeEmitter:: +getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &Reg = MI.getOperand(Op); + const MCOperand &Imm = MI.getOperand(Op + 1); + + unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned Align = 0; + + switch (Imm.getImm()) { + default: break; + case 2: + case 4: + case 8: Align = 0x01; break; + case 16: Align = 0x03; break; + } + + return RegNo | (Align << 4); +} + +unsigned ARMMCCodeEmitter:: +getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(Op); + if (MO.getReg() == 0) return 0x0D; + return MO.getReg(); +} + +void ARMMCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + // Pseudo instructions don't get encoded. + const TargetInstrDesc &Desc = TII.get(MI.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo) + return; + int Size; + // Basic size info comes from the TSFlags field. + switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { + default: llvm_unreachable("Unexpected instruction size!"); + case ARMII::Size2Bytes: Size = 2; break; + case ARMII::Size4Bytes: Size = 4; break; + } + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + // Thumb 32-bit wide instructions need to emit the high order halfword + // first. + if (Subtarget->isThumb() && Size == 4) { + EmitConstant(Binary >> 16, 2, OS); + EmitConstant(Binary & 0xffff, 2, OS); + } else + EmitConstant(Binary, Size, OS); + ++MCNumEmitted; // Keep track of the # of mi's emitted. +} + +#include "ARMGenMCCodeEmitter.inc" diff --git a/lib/Target/ARM/ARMMCExpr.cpp b/lib/Target/ARM/ARMMCExpr.cpp new file mode 100644 index 0000000..2727ba8 --- /dev/null +++ b/lib/Target/ARM/ARMMCExpr.cpp @@ -0,0 +1,73 @@ +//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "armmcexpr" +#include "ARMMCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAssembler.h" +using namespace llvm; + +const ARMMCExpr* +ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) ARMMCExpr(Kind, Expr); +} + +void ARMMCExpr::PrintImpl(raw_ostream &OS) const { + switch (Kind) { + default: assert(0 && "Invalid kind!"); + case VK_ARM_HI16: OS << ":upper16:"; break; + case VK_ARM_LO16: OS << ":lower16:"; break; + } + + const MCExpr *Expr = getSubExpr(); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << '('; + Expr->print(OS); + if (Expr->getKind() != MCExpr::SymbolRef) + OS << ')'; +} + +bool +ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + return false; +} + +// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps +// that method should be made public? +static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { + switch (Value->getKind()) { + case MCExpr::Target: + assert(0 && "Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); + AddValueSymbols_(BE->getLHS(), Asm); + AddValueSymbols_(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: + Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); + break; + } +} + +void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const { + AddValueSymbols_(getSubExpr(), Asm); +} diff --git a/lib/Target/ARM/ARMMCExpr.h b/lib/Target/ARM/ARMMCExpr.h new file mode 100644 index 0000000..d42f766 --- /dev/null +++ b/lib/Target/ARM/ARMMCExpr.h @@ -0,0 +1,73 @@ +//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMMCEXPR_H +#define ARMMCEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class ARMMCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_ARM_None, + VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file) + VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file) + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr) + : Kind(_Kind), Expr(_Expr) {} + +public: + /// @name Construction + /// @{ + + static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_ARM_HI16, Expr, Ctx); + } + + static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) { + return Create(VK_ARM_LO16, Expr, Ctx); + } + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// @} + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const; + void AddValueSymbols(MCAssembler *) const; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const ARMMCExpr *) { return true; } + +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index ab2b06b..59d6050 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -12,122 +12,69 @@ // //===----------------------------------------------------------------------===// -#include "ARMMCInstLower.h" -//#include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/AsmPrinter.h" +#include "ARM.h" +#include "ARMAsmPrinter.h" +#include "ARMMCExpr.h" +#include "llvm/Constants.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -//#include "llvm/MC/MCStreamer.h" #include "llvm/Target/Mangler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" using namespace llvm; -#if 0 -const ARMSubtarget &ARMMCInstLower::getSubtarget() const { - return AsmPrinter.getSubtarget(); -} - -MachineModuleInfoMachO &ARMMCInstLower::getMachOMMI() const { - assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin"); - return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); -} -#endif - -MCSymbol *ARMMCInstLower:: -GetGlobalAddressSymbol(const MachineOperand &MO) const { - // FIXME: HANDLE PLT references how?? - switch (MO.getTargetFlags()) { - default: assert(0 && "Unknown target flag on GV operand"); - case 0: break; - } - - return Printer.Mang->getSymbol(MO.getGlobal()); -} - -MCSymbol *ARMMCInstLower:: -GetExternalSymbolSymbol(const MachineOperand &MO) const { - // FIXME: HANDLE PLT references how?? +static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, + ARMAsmPrinter &Printer) { + MCContext &Ctx = Printer.OutContext; + const MCExpr *Expr; switch (MO.getTargetFlags()) { - default: assert(0 && "Unknown target flag on GV operand"); - case 0: break; + default: { + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx); + switch (MO.getTargetFlags()) { + default: + assert(0 && "Unknown target flag on symbol operand"); + case 0: + break; + case ARMII::MO_LO16: + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx); + Expr = ARMMCExpr::CreateLower16(Expr, Ctx); + break; + case ARMII::MO_HI16: + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx); + Expr = ARMMCExpr::CreateUpper16(Expr, Ctx); + break; + } + break; } - - return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); -} - - -MCSymbol *ARMMCInstLower:: -GetJumpTableSymbol(const MachineOperand &MO) const { - SmallString<256> Name; - raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI" - << Printer.getFunctionNumber() << '_' << MO.getIndex(); - -#if 0 - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); + case ARMII::MO_PLT: + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx); + break; } -#endif - - // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); -} -MCSymbol *ARMMCInstLower:: -GetConstantPoolIndexSymbol(const MachineOperand &MO) const { - SmallString<256> Name; - raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI" - << Printer.getFunctionNumber() << '_' << MO.getIndex(); - -#if 0 - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - } -#endif - - // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCOperand ARMMCInstLower:: -LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { - // FIXME: We would like an efficient form for this, so we don't have to do a - // lot of extra uniquing. - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); - -#if 0 - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - } -#endif - if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); return MCOperand::CreateExpr(Expr); -} +} -void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { +void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + ARMAsmPrinter &AP) { OutMI.setOpcode(MI->getOpcode()); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - + MCOperand MCOp; switch (MO.getType()) { default: MI->dump(); assert(0 && "unknown operand type"); case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) continue; + // Ignore all non-CPSR implicit register operands. + if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue; assert(!MO.getSubReg() && "Subregs should be eliminated!"); MCOp = MCOperand::CreateReg(MO.getReg()); break; @@ -136,27 +83,33 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), Ctx)); + MO.getMBB()->getSymbol(), AP.OutContext)); break; case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + MCOp = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP); break; case MachineOperand::MO_ExternalSymbol: - MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + MCOp = GetSymbolRef(MO, + AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); break; case MachineOperand::MO_JumpTableIndex: - MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); break; case MachineOperand::MO_ConstantPoolIndex: - MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); break; case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand(MO, Printer.GetBlockAddressSymbol( - MO.getBlockAddress())); + MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP); break; + case MachineOperand::MO_FPImmediate: { + APFloat Val = MO.getFPImm()->getValueAPF(); + bool ignored; + Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); + break; + } } - + OutMI.addOperand(MCOp); } - } diff --git a/lib/Target/ARM/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h deleted file mode 100644 index b81a306..0000000 --- a/lib/Target/ARM/ARMMCInstLower.h +++ /dev/null @@ -1,56 +0,0 @@ -//===-- ARMMCInstLower.h - Lower MachineInstr to MCInst -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM_MCINSTLOWER_H -#define ARM_MCINSTLOWER_H - -#include "llvm/Support/Compiler.h" - -namespace llvm { - class AsmPrinter; - class MCAsmInfo; - class MCContext; - class MCInst; - class MCOperand; - class MCSymbol; - class MachineInstr; - class MachineModuleInfoMachO; - class MachineOperand; - class Mangler; - //class ARMSubtarget; - -/// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst. -class LLVM_LIBRARY_VISIBILITY ARMMCInstLower { - MCContext &Ctx; - Mangler &Mang; - AsmPrinter &Printer; - - //const ARMSubtarget &getSubtarget() const; -public: - ARMMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) - : Ctx(ctx), Mang(mang), Printer(printer) {} - - void Lower(const MachineInstr *MI, MCInst &OutMI) const; - - //MCSymbol *GetPICBaseSymbol() const; - MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; - MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; - MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; - MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; - MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - -/* -private: - MachineModuleInfoMachO &getMachOMMI() const; - */ -}; - -} - -#endif diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 514c26b..138f0c2 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -22,8 +22,8 @@ namespace llvm { -/// ARMFunctionInfo - This class is derived from MachineFunction private -/// ARM target-specific information for each MachineFunction. +/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and +/// contains private ARM-specific information for each MachineFunction. class ARMFunctionInfo : public MachineFunctionInfo { /// isThumb - True if this function is compiled under Thumb mode. @@ -79,15 +79,11 @@ class ARMFunctionInfo : public MachineFunctionInfo { BitVector GPRCS2Frames; BitVector DPRCSFrames; - /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers. - /// - BitVector SpilledCSRegs; - /// JumpTableUId - Unique id for jumptables. /// unsigned JumpTableUId; - unsigned ConstPoolEntryUId; + unsigned PICLabelUId; /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; @@ -95,6 +91,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// HasITBlocks - True if IT blocks have been inserted. bool HasITBlocks; + /// CPEClones - Track constant pool entries clones created by Constant Island + /// pass. + DenseMap<unsigned, unsigned> CPEClones; + public: ARMFunctionInfo() : isThumb(false), @@ -104,8 +104,8 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), - HasITBlocks(false) {} + JumpTableUId(0), PICLabelUId(0), + VarArgsFrameIndex(0), HasITBlocks(false) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -115,9 +115,8 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), - SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), - HasITBlocks(false) {} + JumpTableUId(0), PICLabelUId(0), + VarArgsFrameIndex(0), HasITBlocks(false) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -207,18 +206,6 @@ public: } } - void setCSRegisterIsSpilled(unsigned Reg) { - SpilledCSRegs.set(Reg); - } - - bool isCSRegisterSpilled(unsigned Reg) const { - return SpilledCSRegs[Reg]; - } - - const BitVector &getSpilledCSRegisters() const { - return SpilledCSRegs; - } - unsigned createJumpTableUId() { return JumpTableUId++; } @@ -227,16 +214,16 @@ public: return JumpTableUId; } - void initConstPoolEntryUId(unsigned UId) { - ConstPoolEntryUId = UId; + void initPICLabelUId(unsigned UId) { + PICLabelUId = UId; } - unsigned getNumConstPoolEntries() const { - return ConstPoolEntryUId; + unsigned getNumPICLabels() const { + return PICLabelUId; } - unsigned createConstPoolEntryUId() { - return ConstPoolEntryUId++; + unsigned createPICLabelUId() { + return PICLabelUId++; } int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } @@ -244,6 +231,19 @@ public: bool hasITBlocks() const { return HasITBlocks; } void setHasITBlocks(bool h) { HasITBlocks = h; } + + void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { + if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) + assert(0 && "Duplicate entries!"); + } + + unsigned getOriginalCPIdx(unsigned CloneIdx) const { + DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx); + if (I != CPEClones.end()) + return I->second; + else + return -1U; + } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h index 5ff7c38..edecc4b 100644 --- a/lib/Target/ARM/ARMPerfectShuffle.h +++ b/lib/Target/ARM/ARMPerfectShuffle.h @@ -21,6566 +21,6566 @@ // This table is 6561*4 = 26244 bytes in size. static const unsigned PerfectShuffleTable[6561+1] = { - 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS - 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS - 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0> - 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> - 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS - 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3> - 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3> - 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> - 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS - 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0> - 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS - 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS - 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0> - 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5> - 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7> - 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1> - 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1> - 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS - 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0> - 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1> - 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS - 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0> - 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6> - 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6> - 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7> - 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> - 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS - 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> - 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0> - 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0> - 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3> - 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6> - 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6> - 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7> - 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0> - 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> - 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1> - 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS - 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS - 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4> - 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6> - 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS - 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS - 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5> - 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS - 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7> - 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3> - 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7> - 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5> - 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6> - 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5> - 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7> - 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7> - 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7> - 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7> - 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS - 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3> - 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7> - 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS - 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0> - 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6> - 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0> - 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0> - 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> - 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0> - 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7> - 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0> - 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6> - 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0> - 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7> - 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7> - 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> - 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS - 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS - 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS - 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u> - 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS - 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS - 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS - 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u> - 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS - 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1> - 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS - 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1> - 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0> - 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5> - 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7> - 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1> - 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0> - 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS - 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1> - 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1> - 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0> - 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3> - 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS - 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7> - 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3> - 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1> - 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS - 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS - 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1> - 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2> - 835584U, // <0,1,2,3>: Cost 0 copy LHS - 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS - 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7> - 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7> - 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2> - 835584U, // <0,1,2,u>: Cost 0 copy LHS - 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0> - 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3> - 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0> - 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0> - 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS - 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7> - 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0> - 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1> - 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3> - 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS - 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1> - 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1> - 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4> - 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS - 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS - 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS - 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4> - 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS - 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1> - 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1> - 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0> - 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7> - 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6> - 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1> - 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1> - 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1> - 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7> - 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS - 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7> - 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1> - 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7> - 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS - 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7> - 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1> - 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1> - 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1> - 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0> - 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1> - 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0> - 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1> - 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6> - 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0> - 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0> - 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7> - 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2> - 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS - 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS - 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS - 835584U, // <0,1,u,3>: Cost 0 copy LHS - 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS - 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS - 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS - 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u> - 835584U, // <0,1,u,u>: Cost 0 copy LHS - 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0> - 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS - 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS - 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0> - 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6> - 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7> - 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7> - 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0> - 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS - 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2> - 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1> - 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2> - 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> - 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS - 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7> - 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7> - 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7> - 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2> - 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2> - 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2> - 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2> - 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3> - 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS - 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3> - 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7> - 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2> - 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS - 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2> - 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> - 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3> - 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> - 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6> - 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> - 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3> - 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0> - 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> - 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS - 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3> - 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4> - 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4> - 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS - 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS - 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS - 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> - 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS - 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7> - 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3> - 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7> - 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6> - 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6> - 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5> - 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0> - 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS - 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS - 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1> - 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2> - 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3> - 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7> - 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5> - 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6> - 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6> - 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2> - 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7> - 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> - 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2> - 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2> - 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0> - 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6> - 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2> - 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2> - 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7> - 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> - 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u> - 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS - 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS - 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3> - 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS - 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS - 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS - 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS - 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS - 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0> - 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2> - 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0> - 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3> - 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS - 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6> - 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7> - 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0> - 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS - 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2> - 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1> - 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3> - 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3> - 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6> - 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6> - 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1> - 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3> - 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2> - 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS - 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2> - 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2> - 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3> - 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS - 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6> - 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3> - 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2> - 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS - 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2> - 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3> - 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3> - 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3> - 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6> - 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6> - 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7> - 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7> - 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3> - 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2> - 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4> - 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4> - 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3> - 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6> - 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6> - 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS - 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4> - 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6> - 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS - 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2> - 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2> - 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7> - 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5> - 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7> - 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7> - 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0> - 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5> - 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7> - 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3> - 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7> - 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7> - 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7> - 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6> - 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6> - 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0> - 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0> - 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1> - 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3> - 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7> - 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7> - 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5> - 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7> - 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7> - 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0> - 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3> - 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS - 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u> - 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2> - 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3> - 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS - 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6> - 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3> - 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0> - 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS - 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4> - 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS - 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4> - 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0> - 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6> - 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1> - 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS - 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0> - 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS - 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS - 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1> - 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0> - 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1> - 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS - 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS - 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS - 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> - 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS - 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS - 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2> - 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2> - 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4> - 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS - 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS - 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS - 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> - 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS - 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2> - 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2> - 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4> - 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3> - 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6> - 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6> - 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS - 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4> - 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2> - 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4> - 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0> - 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3> - 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4> - 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4> - 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS - 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS - 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4> - 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS - 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS - 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0> - 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5> - 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5> - 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS - 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6> - 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS - 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5> - 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS - 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6> - 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2> - 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6> - 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0> - 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6> - 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6> - 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0> - 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4> - 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2> - 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS - 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1> - 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4> - 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4> - 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS - 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> - 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0> - 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2> - 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> - 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS - 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS - 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS - 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u> - 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS - 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS - 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS - 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u> - 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS - 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0> - 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS - 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2> - 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5> - 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS - 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1> - 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1> - 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS - 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS - 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS - 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3> - 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2> - 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2> - 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1> - 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5> - 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0> - 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3> - 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS - 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS - 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2> - 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7> - 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2> - 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS - 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5> - 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6> - 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS - 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS - 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2> - 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3> - 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1> - 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3> - 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5> - 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0> - 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7> - 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0> - 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0> - 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1> - 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4> - 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4> - 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5> - 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6> - 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS - 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5> - 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6> - 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS - 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0> - 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0> - 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5> - 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0> - 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5> - 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5> - 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0> - 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7> - 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7> - 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS - 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0> - 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3> - 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4> - 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS - 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0> - 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7> - 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0> - 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0> - 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS - 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0> - 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7> - 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2> - 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS - 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7> - 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0> - 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0> - 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS - 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS - 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS - 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0> - 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u> - 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u> - 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS - 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u> - 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0> - 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0> - 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS - 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS - 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2> - 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4> - 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6> - 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0> - 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0> - 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS - 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS - 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS - 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1> - 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3> - 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3> - 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS - 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1> - 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6> - 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS - 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS - 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS - 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2> - 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2> - 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1> - 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2> - 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3> - 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6> - 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS - 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS - 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2> - 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7> - 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0> - 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3> - 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6> - 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7> - 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0> - 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0> - 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0> - 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS - 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2> - 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4> - 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2> - 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS - 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS - 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0> - 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS - 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS - 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> - 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0> - 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7> - 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0> - 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5> - 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0> - 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7> - 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS - 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> - 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0> - 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3> - 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6> - 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0> - 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4> - 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6> - 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6> - 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7> - 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7> - 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1> - 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0> - 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7> - 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0> - 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5> - 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6> - 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2> - 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0> - 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1> - 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS - 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS - 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u> - 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0> - 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u> - 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS - 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0> - 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS - 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS - 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0> - 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS - 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0> - 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0> - 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5> - 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6> - 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7> - 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7> - 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS - 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1> - 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1> - 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0> - 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5> - 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1> - 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3> - 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7> - 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7> - 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1> - 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS - 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2> - 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2> - 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0> - 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS - 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7> - 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2> - 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7> - 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2> - 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2> - 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3> - 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3> - 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3> - 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6> - 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7> - 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> - 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0> - 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7> - 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS - 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4> - 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4> - 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7> - 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6> - 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS - 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7> - 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5> - 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS - 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0> - 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7> - 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5> - 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7> - 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5> - 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7> - 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7> - 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0> - 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7> - 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0> - 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6> - 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7> - 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7> - 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS - 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7> - 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6> - 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0> - 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7> - 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1> - 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0> - 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7> - 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0> - 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS - 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7> - 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7> - 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7> - 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7> - 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u> - 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u> - 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u> - 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0> - 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u> - 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS - 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7> - 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0> - 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u> - 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS - 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS - 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS - 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2> - 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS - 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6> - 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS - 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0> - 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS - 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1> - 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS - 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS - 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> - 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS - 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS - 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7> - 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS - 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS - 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS - 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2> - 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS - 835584U, // <0,u,2,3>: Cost 0 copy LHS - 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS - 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6> - 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS - 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2> - 835584U, // <0,u,2,u>: Cost 0 copy LHS - 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2> - 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> - 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u> - 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> - 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6> - 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> - 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> - 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u> - 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> - 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS - 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS - 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS - 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4> - 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS - 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS - 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS - 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6> - 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS - 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS - 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0> - 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7> - 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7> - 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS - 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u> - 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS - 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS - 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS - 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS - 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6> - 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u> - 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7> - 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS - 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u> - 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u> - 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u> - 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u> - 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS - 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0> - 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7> - 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u> - 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS - 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6> - 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7> - 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7> - 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS - 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS - 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS - 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS - 835584U, // <0,u,u,3>: Cost 0 copy LHS - 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS - 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS - 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS - 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u> - 835584U, // <0,u,u,u>: Cost 0 copy LHS - 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0> - 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1> - 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2> - 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0> - 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1> - 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0> - 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7> - 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0> - 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1> - 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS - 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1> - 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS - 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3> - 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS - 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1> - 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7> - 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2> - 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS - 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1> - 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1> - 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0> - 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1> - 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6> - 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7> - 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0> - 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2> - 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1> - 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0> - 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1> - 67944550U, // <1,0,3,2>: Cost 1 vrev LHS - 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3> - 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS - 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7> - 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7> - 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3> - 68386972U, // <1,0,3,u>: Cost 1 vrev LHS - 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1> - 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5> - 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6> - 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1> - 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1> - 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS - 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1> - 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4> - 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS - 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0> - 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS - 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS - 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5> - 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5> - 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0> - 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0> - 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS - 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS - 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1> - 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7> - 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7> - 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6> - 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1> - 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0> - 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0> - 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0> - 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0> - 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0> - 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1> - 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7> - 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0> - 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6> - 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0> - 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0> - 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7> - 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0> - 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0> - 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1> - 67985515U, // <1,0,u,2>: Cost 1 vrev LHS - 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1> - 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6> - 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS - 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0> - 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u> - 68427937U, // <1,0,u,u>: Cost 1 vrev LHS - 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1> - 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS - 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1> - 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2> - 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5> - 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1> - 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7> - 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0> - 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1> - 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS - 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0> - 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3> - 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS - 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7> - 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7> - 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> - 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS - 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2> - 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1> - 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2> - 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1> - 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS - 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7> - 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7> - 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0> - 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1> - 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2> - 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1> - 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2> - 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS - 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6> - 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7> - 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7> - 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3> - 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS - 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS - 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4> - 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0> - 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5> - 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS - 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS - 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS - 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4> - 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS - 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1> - 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3> - 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2> - 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7> - 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5> - 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5> - 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0> - 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS - 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7> - 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2> - 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7> - 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3> - 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7> - 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6> - 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5> - 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6> - 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0> - 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0> - 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1> - 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> - 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3> - 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS - 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6> - 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7> - 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0> - 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7> - 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> - 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS - 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3> - 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS - 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS - 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS - 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7> - 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS - 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS - 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0> - 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS - 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2> - 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1> - 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5> - 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7> - 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2> - 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1> - 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS - 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2> - 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1> - 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0> - 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS - 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS - 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7> - 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7> - 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0> - 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS - 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2> - 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2> - 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2> - 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3> - 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5> - 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7> - 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7> - 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1> - 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3> - 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS - 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3> - 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS - 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> - 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> - 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS - 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2> - 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6> - 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4> - 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2> - 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4> - 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS - 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6> - 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0> - 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS - 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS - 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7> - 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2> - 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS - 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS - 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5> - 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0> - 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7> - 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS - 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1> - 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2> - 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3> - 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7> - 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5> - 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7> - 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6> - 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2> - 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7> - 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2> - 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2> - 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3> - 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1> - 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6> - 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0> - 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1> - 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1> - 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2> - 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS - 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2> - 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS - 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS - 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> - 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS - 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0> - 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS - 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2> - 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3> - 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5> - 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6> - 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7> - 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1> - 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS - 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2> - 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1> - 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3> - 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS - 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS - 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7> - 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7> - 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1> - 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS - 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> - 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3> - 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> - 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> - 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> - 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5> - 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> - 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3> - 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> - 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS - 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3> - 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2> - 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3> - 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS - 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> - 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3> - 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7> - 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS - 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS - 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3> - 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3> - 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4> - 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS - 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS - 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6> - 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4> - 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS - 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS - 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7> - 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5> - 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5> - 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS - 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5> - 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4> - 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS - 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS - 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0> - 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1> - 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3> - 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7> - 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3> - 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5> - 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3> - 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> - 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> - 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1> - 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3> - 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7> - 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3> - 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6> - 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5> - 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1> - 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7> - 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1> - 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS - 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u> - 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2> - 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS - 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS - 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS - 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6> - 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS - 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS - 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4> - 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS - 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4> - 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> - 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5> - 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1> - 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2> - 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1> - 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1> - 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2> - 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4> - 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0> - 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6> - 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1> - 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS - 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS - 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0> - 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS - 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2> - 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3> - 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2> - 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1> - 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4> - 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS - 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7> - 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2> - 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS - 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS - 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4> - 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3> - 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3> - 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6> - 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5> - 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6> - 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3> - 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u> - 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1> - 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4> - 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4> - 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4> - 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4> - 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS - 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6> - 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1> - 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS - 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS - 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5> - 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1> - 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2> - 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS - 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS - 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS - 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS - 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS - 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS - 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1> - 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2> - 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2> - 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS - 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7> - 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7> - 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> - 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS - 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4> - 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1> - 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4> - 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4> - 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS - 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0> - 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1> - 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2> - 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4> - 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS - 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS - 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u> - 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1> - 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6> - 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1> - 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS - 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> - 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS - 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0> - 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS - 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5> - 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4> - 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> - 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5> - 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1> - 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1> - 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS - 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2> - 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1> - 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0> - 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7> - 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5> - 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1> - 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7> - 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS - 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3> - 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1> - 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5> - 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2> - 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1> - 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5> - 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7> - 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7> - 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS - 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1> - 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2> - 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7> - 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2> - 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3> - 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6> - 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5> - 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6> - 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS - 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS - 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1> - 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> - 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3> - 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4> - 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4> - 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS - 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS - 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6> - 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS - 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS - 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5> - 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5> - 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3> - 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS - 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5> - 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0> - 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7> - 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS - 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1> - 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5> - 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3> - 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4> - 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6> - 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6> - 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6> - 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0> - 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0> - 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS - 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1> - 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1> - 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7> - 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS - 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7> - 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0> - 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1> - 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS - 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2> - 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS - 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3> - 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1> - 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5> - 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS - 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7> - 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS - 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS - 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0> - 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS - 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6> - 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1> - 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5> - 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6> - 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6> - 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS - 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS - 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2> - 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1> - 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0> - 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3> - 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6> - 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7> - 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1> - 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS - 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS - 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1> - 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0> - 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2> - 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1> - 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS - 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7> - 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7> - 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS - 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS - 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS - 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1> - 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3> - 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3> - 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS - 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5> - 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3> - 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7> - 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u> - 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1> - 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0> - 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4> - 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4> - 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5> - 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS - 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS - 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS - 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS - 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2> - 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5> - 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5> - 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5> - 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6> - 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5> - 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0> - 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS - 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS - 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1> - 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6> - 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7> - 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0> - 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS - 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7> - 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6> - 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7> - 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7> - 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1> - 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1> - 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3> - 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7> - 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5> - 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1> - 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7> - 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS - 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1> - 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1> - 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS - 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2> - 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1> - 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5> - 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS - 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7> - 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7> - 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1> - 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1> - 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS - 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1> - 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1> - 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1> - 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0> - 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7> - 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7> - 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS - 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS - 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1> - 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0> - 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1> - 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS - 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7> - 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7> - 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1> - 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7> - 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS - 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7> - 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2> - 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1> - 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS - 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7> - 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2> - 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2> - 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS - 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS - 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7> - 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2> - 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3> - 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS - 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3> - 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3> - 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2> - 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS - 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1> - 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1> - 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4> - 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4> - 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4> - 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS - 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0> - 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6> - 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS - 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS - 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7> - 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2> - 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5> - 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS - 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6> - 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7> - 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS - 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS - 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1> - 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0> - 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7> - 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6> - 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS - 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6> - 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6> - 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0> - 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0> - 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1> - 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1> - 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1> - 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0> - 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS - 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7> - 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7> - 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7> - 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1> - 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS - 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS - 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2> - 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u> - 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS - 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u> - 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3> - 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2> - 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS - 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u> - 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS - 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2> - 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u> - 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u> - 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1> - 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2> - 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1> - 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS - 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS - 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS - 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS - 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS - 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7> - 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS - 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS - 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS - 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> - 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS - 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> - 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> - 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> - 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS - 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> - 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS - 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> - 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS - 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 115726126U, // <1,u,3,2>: Cost 1 vrev LHS - 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS - 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS - 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3> - 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS - 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS - 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1> - 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0> - 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4> - 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4> - 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS - 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS - 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6> - 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6> - 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS - 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS - 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7> - 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS - 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS - 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS - 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS - 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS - 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS - 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS - 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u> - 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0> - 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3> - 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7> - 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4> - 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7> - 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u> - 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> - 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> - 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u> - 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1> - 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1> - 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7> - 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS - 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u> - 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7> - 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7> - 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u> - 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS - 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS - 115767091U, // <1,u,u,2>: Cost 1 vrev LHS - 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS - 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS - 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS - 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS - 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS - 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS - 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0> - 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1> - 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2> - 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0> - 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS - 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5> - 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0> - 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7> - 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2> - 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1> - 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0> - 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS - 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS - 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS - 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7> - 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1> - 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2> - 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS - 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS - 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2> - 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0> - 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2> - 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS - 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3> - 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2> - 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2> - 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS - 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> - 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1> - 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2> - 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3> - 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS - 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5> - 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6> - 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7> - 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u> - 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS - 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5> - 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6> - 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2> - 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS - 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS - 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS - 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5> - 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6> - 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS - 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5> - 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7> - 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5> - 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5> - 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5> - 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0> - 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS - 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5> - 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0> - 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS - 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7> - 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6> - 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6> - 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0> - 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6> - 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7> - 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6> - 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> - 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3> - 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1> - 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0> - 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6> - 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2> - 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0> - 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7> - 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> - 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS - 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1> - 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS - 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2> - 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS - 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS - 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS - 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS - 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS - 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS - 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS - 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0> - 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2> - 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS - 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2> - 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0> - 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1> - 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2> - 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS - 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1> - 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0> - 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3> - 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS - 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7> - 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1> - 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2> - 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3> - 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS - 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1> - 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2> - 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0> - 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS - 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3> - 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7> - 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0> - 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0> - 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS - 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1> - 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2> - 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3> - 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS - 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5> - 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> - 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7> - 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u> - 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5> - 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6> - 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5> - 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4> - 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS - 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS - 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS - 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6> - 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4> - 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS - 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7> - 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1> - 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7> - 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS - 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5> - 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5> - 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS - 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7> - 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS - 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2> - 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7> - 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS - 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS - 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5> - 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6> - 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1> - 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS - 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2> - 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1> - 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0> - 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7> - 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6> - 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7> - 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1> - 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u> - 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2> - 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS - 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1> - 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2> - 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u> - 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS - 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5> - 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> - 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1> - 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u> - 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2> - 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS - 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0> - 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2> - 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS - 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7> - 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6> - 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2> - 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2> - 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2> - 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1> - 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0> - 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS - 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS - 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7> - 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3> - 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1> - 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS - 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS - 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2> - 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS - 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3> - 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS - 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7> - 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7> - 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> - 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS - 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1> - 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0> - 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2> - 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS - 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5> - 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5> - 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6> - 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3> - 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS - 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS - 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2> - 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5> - 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5> - 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS - 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS - 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS - 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4> - 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS - 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2> - 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3> - 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7> - 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS - 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5> - 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5> - 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0> - 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS - 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS - 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6> - 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3> - 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3> - 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7> - 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS - 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7> - 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6> - 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2> - 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7> - 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1> - 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5> - 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2> - 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS - 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6> - 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7> - 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7> - 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7> - 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1> - 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS - 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS - 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS - 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS - 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS - 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS - 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6> - 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS - 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS - 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS - 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3> - 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7> - 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> - 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0> - 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS - 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS - 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7> - 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0> - 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> - 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2> - 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1> - 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> - 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5> - 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7> - 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1> - 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> - 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3> - 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3> - 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3> - 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7> - 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3> - 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> - 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2> - 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS - 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4> - 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> - 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2> - 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS - 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS - 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4> - 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS - 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS - 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3> - 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4> - 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6> - 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> - 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7> - 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7> - 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> - 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6> - 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3> - 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5> - 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> - 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7> - 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1> - 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2> - 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3> - 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3> - 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1> - 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6> - 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2> - 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1> - 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2> - 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2> - 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS - 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0> - 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1> - 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6> - 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS - 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7> - 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1> - 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS - 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4> - 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS - 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4> - 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4> - 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2> - 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1> - 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> - 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1> - 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> - 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2> - 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1> - 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4> - 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS - 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4> - 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7> - 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3> - 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1> - 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS - 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4> - 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3> - 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4> - 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5> - 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4> - 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS - 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS - 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0> - 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS - 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4> - 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1> - 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4> - 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3> - 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4> - 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> - 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> - 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7> - 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> - 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2> - 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1> - 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2> - 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3> - 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4> - 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS - 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS - 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7> - 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS - 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5> - 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2> - 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5> - 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5> - 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS - 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5> - 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS - 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS - 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS - 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS - 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2> - 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4> - 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2> - 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS - 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS - 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6> - 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1> - 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS - 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> - 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2> - 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4> - 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4> - 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6> - 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0> - 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1> - 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7> - 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> - 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS - 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS - 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2> - 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2> - 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS - 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5> - 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS - 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS - 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS - 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0> - 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS - 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2> - 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5> - 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5> - 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5> - 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7> - 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS - 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS - 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2> - 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1> - 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0> - 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7> - 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5> - 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7> - 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2> - 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3> - 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5> - 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2> - 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1> - 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2> - 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1> - 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5> - 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1> - 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7> - 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS - 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS - 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS - 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3> - 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5> - 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3> - 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6> - 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3> - 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7> - 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u> - 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS - 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4> - 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5> - 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4> - 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS - 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS - 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6> - 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS - 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS - 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS - 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7> - 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5> - 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5> - 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS - 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5> - 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6> - 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7> - 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7> - 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS - 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6> - 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3> - 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6> - 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5> - 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7> - 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6> - 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS - 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS - 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS - 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2> - 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7> - 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2> - 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS - 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7> - 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2> - 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS - 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS - 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS - 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS - 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u> - 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u> - 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6> - 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS - 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7> - 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS - 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0> - 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS - 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2> - 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0> - 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6> - 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6> - 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1> - 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2> - 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS - 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2> - 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1> - 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0> - 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS - 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6> - 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7> - 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7> - 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2> - 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS - 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1> - 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3> - 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2> - 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1> - 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6> - 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7> - 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6> - 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS - 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1> - 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2> - 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1> - 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7> - 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3> - 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6> - 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5> - 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> - 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS - 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS - 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2> - 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3> - 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0> - 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4> - 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4> - 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS - 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6> - 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS - 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS - 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2> - 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3> - 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7> - 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5> - 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5> - 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5> - 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0> - 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS - 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS - 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS - 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1> - 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6> - 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3> - 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS - 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5> - 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6> - 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS - 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS - 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1> - 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2> - 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2> - 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0> - 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5> - 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7> - 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2> - 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0> - 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1> - 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2> - 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS - 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3> - 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS - 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6> - 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS - 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7> - 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS - 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS - 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2> - 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2> - 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2> - 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0> - 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2> - 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7> - 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7> - 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2> - 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2> - 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> - 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1> - 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0> - 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7> - 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5> - 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7> - 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7> - 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0> - 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> - 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2> - 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7> - 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2> - 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1> - 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6> - 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7> - 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7> - 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1> - 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7> - 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS - 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7> - 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2> - 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3> - 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS - 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3> - 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3> - 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2> - 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS - 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6> - 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4> - 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7> - 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4> - 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4> - 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS - 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u> - 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0> - 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS - 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2> - 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7> - 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7> - 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5> - 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS - 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5> - 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7> - 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6> - 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS - 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS - 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7> - 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3> - 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6> - 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS - 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6> - 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6> - 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0> - 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS - 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1> - 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2> - 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7> - 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7> - 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS - 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5> - 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7> - 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7> - 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1> - 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS - 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2> - 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2> - 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u> - 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS - 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS - 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u> - 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2> - 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS - 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS - 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2> - 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7> - 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2> - 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS - 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS - 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5> - 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7> - 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3> - 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3> - 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS - 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1> - 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS - 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS - 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> - 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS - 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS - 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1> - 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3> - 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS - 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5> - 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3> - 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS - 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS - 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS - 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4> - 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5> - 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4> - 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS - 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS - 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS - 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS - 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5> - 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3> - 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5> - 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7> - 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS - 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7> - 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS - 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS - 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2> - 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3> - 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7> - 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS - 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS - 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS - 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2> - 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u> - 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7> - 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1> - 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6> - 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2> - 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7> - 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2> - 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS - 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS - 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS - 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS - 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS - 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS - 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS - 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS - 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS - 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0> - 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> - 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> - 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1> - 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1> - 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7> - 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1> - 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0> - 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2> - 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS - 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0> - 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS - 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3> - 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS - 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7> - 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7> - 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1> - 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS - 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> - 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0> - 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> - 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7> - 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7> - 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7> - 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> - 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2> - 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> - 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1> - 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3> - 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6> - 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7> - 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7> - 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1> - 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3> - 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4> - 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> - 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> - 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1> - 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6> - 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS - 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6> - 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4> - 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> - 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7> - 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3> - 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS - 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6> - 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6> - 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5> - 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7> - 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7> - 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS - 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7> - 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7> - 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7> - 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2> - 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1> - 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0> - 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6> - 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0> - 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7> - 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2> - 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7> - 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0> - 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7> - 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6> - 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7> - 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3> - 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7> - 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0> - 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2> - 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> - 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS - 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> - 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> - 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS - 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7> - 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u> - 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS - 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS - 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS - 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1> - 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> - 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS - 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1> - 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6> - 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0> - 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> - 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1> - 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1> - 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1> - 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> - 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5> - 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> - 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5> - 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3> - 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3> - 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1> - 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> - 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2> - 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0> - 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS - 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> - 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7> - 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0> - 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0> - 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS - 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> - 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> - 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1> - 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS - 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> - 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> - 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3> - 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> - 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS - 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5> - 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5> - 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5> - 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS - 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS - 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS - 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4> - 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5> - 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1> - 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7> - 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5> - 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> - 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5> - 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7> - 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0> - 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS - 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> - 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1> - 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> - 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7> - 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> - 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5> - 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> - 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7> - 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0> - 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7> - 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS - 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1> - 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2> - 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS - 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS - 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7> - 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7> - 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7> - 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS - 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS - 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3> - 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> - 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0> - 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS - 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> - 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> - 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS - 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3> - 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0> - 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS - 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0> - 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> - 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5> - 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7> - 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> - 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0> - 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS - 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2> - 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1> - 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0> - 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> - 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS - 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7> - 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> - 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1> - 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1> - 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1> - 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3> - 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2> - 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> - 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5> - 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> - 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6> - 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3> - 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3> - 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1> - 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0> - 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2> - 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3> - 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5> - 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1> - 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2> - 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0> - 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1> - 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS - 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4> - 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4> - 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> - 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS - 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS - 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0> - 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4> - 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS - 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS - 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5> - 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> - 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> - 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5> - 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5> - 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> - 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7> - 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5> - 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1> - 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3> - 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6> - 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> - 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5> - 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> - 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> - 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1> - 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> - 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2> - 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3> - 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2> - 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS - 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6> - 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7> - 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7> - 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7> - 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS - 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1> - 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS - 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2> - 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> - 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5> - 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS - 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0> - 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1> - 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1> - 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0> - 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2> - 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0> - 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2> - 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1> - 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2> - 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2> - 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7> - 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2> - 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3> - 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3> - 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3> - 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1> - 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS - 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3> - 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3> - 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3> - 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3> - 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS - 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3> - 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3> - 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0> - 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS - 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4> - 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3> - 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3> - 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3> - 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS - 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3> - 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3> - 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS - 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS - 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5> - 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7> - 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3> - 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS - 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS - 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4> - 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4> - 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6> - 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4> - 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6> - 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS - 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7> - 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6> - 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS - 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5> - 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5> - 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5> - 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS - 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5> - 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0> - 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS - 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS - 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7> - 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7> - 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7> - 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7> - 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7> - 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7> - 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6> - 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3> - 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3> - 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS - 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7> - 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7> - 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3> - 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS - 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7> - 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3> - 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7> - 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS - 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS - 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2> - 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3> - 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS - 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS - 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6> - 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3> - 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS - 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS - 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0> - 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS - 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2> - 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4> - 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5> - 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1> - 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2> - 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0> - 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS - 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2> - 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1> - 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4> - 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3> - 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS - 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0> - 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> - 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4> - 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4> - 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS - 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3> - 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2> - 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1> - 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3> - 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3> - 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0> - 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4> - 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0> - 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2> - 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4> - 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4> - 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3> - 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1> - 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS - 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS - 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1> - 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2> - 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS - 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4> - 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4> - 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> - 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4> - 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS - 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6> - 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4> - 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS - 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS - 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> - 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5> - 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2> - 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS - 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7> - 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS - 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5> - 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS - 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1> - 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6> - 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3> - 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6> - 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6> - 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> - 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7> - 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4> - 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2> - 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2> - 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5> - 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4> - 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7> - 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6> - 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0> - 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0> - 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7> - 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4> - 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS - 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS - 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u> - 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1> - 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS - 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS - 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS - 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u> - 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS - 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0> - 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS - 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5> - 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4> - 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1> - 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> - 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1> - 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0> - 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS - 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS - 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1> - 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5> - 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5> - 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5> - 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3> - 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7> - 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3> - 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3> - 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3> - 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5> - 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2> - 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5> - 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5> - 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3> - 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7> - 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3> - 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5> - 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2> - 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5> - 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4> - 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3> - 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6> - 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5> - 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6> - 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS - 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS - 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS - 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5> - 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4> - 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0> - 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS - 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS - 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS - 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6> - 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6> - 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS - 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3> - 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5> - 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5> - 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS - 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5> - 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6> - 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7> - 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7> - 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1> - 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7> - 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6> - 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4> - 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5> - 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7> - 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7> - 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0> - 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0> - 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS - 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7> - 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2> - 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2> - 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS - 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7> - 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0> - 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7> - 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS - 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS - 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u> - 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2> - 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2> - 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS - 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7> - 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS - 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3> - 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS - 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS - 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2> - 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4> - 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4> - 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2> - 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7> - 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0> - 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS - 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2> - 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3> - 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1> - 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6> - 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1> - 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6> - 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3> - 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3> - 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3> - 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6> - 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS - 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3> - 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6> - 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0> - 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6> - 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6> - 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3> - 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3> - 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3> - 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2> - 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3> - 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3> - 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3> - 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6> - 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6> - 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2> - 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS - 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS - 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6> - 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3> - 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5> - 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6> - 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6> - 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6> - 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0> - 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS - 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6> - 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS - 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2> - 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7> - 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> - 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5> - 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6> - 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6> - 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5> - 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS - 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1> - 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3> - 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3> - 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6> - 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4> - 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7> - 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7> - 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7> - 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1> - 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7> - 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7> - 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3> - 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5> - 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1> - 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2> - 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS - 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1> - 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1> - 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2> - 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u> - 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6> - 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5> - 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6> - 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3> - 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1> - 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0> - 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS - 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2> - 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0> - 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5> - 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0> - 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0> - 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1> - 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS - 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2> - 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1> - 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0> - 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7> - 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS - 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7> - 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7> - 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3> - 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7> - 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS - 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3> - 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2> - 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1> - 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS - 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7> - 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7> - 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3> - 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7> - 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2> - 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3> - 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3> - 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3> - 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6> - 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7> - 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7> - 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7> - 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2> - 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS - 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7> - 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7> - 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7> - 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS - 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS - 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4> - 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6> - 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS - 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2> - 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3> - 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3> - 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0> - 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5> - 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5> - 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7> - 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS - 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS - 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1> - 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0> - 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3> - 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0> - 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5> - 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4> - 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6> - 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0> - 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7> - 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS - 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7> - 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7> - 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7> - 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS - 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7> - 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7> - 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7> - 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2> - 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS - 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0> - 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS - 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6> - 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS - 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7> - 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS - 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0> - 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2> - 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2> - 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2> - 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1> - 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1> - 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2> - 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS - 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2> - 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u> - 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u> - 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS - 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3> - 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS - 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u> - 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u> - 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3> - 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS - 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3> - 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u> - 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0> - 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7> - 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u> - 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3> - 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0> - 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1> - 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3> - 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2> - 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS - 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5> - 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7> - 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7> - 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS - 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS - 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS - 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5> - 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6> - 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5> - 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4> - 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6> - 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6> - 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6> - 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6> - 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS - 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7> - 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5> - 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7> - 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS - 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5> - 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS - 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7> - 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS - 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1> - 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6> - 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7> - 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7> - 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6> - 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7> - 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0> - 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7> - 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS - 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7> - 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2> - 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS - 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS - 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7> - 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2> - 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS - 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1> - 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2> - 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS - 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS - 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5> - 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6> - 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS - 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0> - 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS - 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0> - 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1> - 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2> - 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4> - 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4> - 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0> - 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0> - 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0> - 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1> - 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS - 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4> - 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1> - 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS - 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4> - 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1> - 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1> - 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4> - 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4> - 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4> - 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0> - 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6> - 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7> - 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> - 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2> - 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4> - 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS - 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> - 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4> - 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4> - 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS - 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5> - 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7> - 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0> - 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> - 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS - 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5> - 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6> - 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2> - 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS - 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS - 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2> - 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4> - 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS - 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS - 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS - 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2> - 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5> - 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS - 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7> - 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7> - 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5> - 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS - 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS - 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1> - 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS - 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6> - 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS - 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6> - 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6> - 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0> - 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS - 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2> - 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS - 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS - 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0> - 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5> - 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5> - 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0> - 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7> - 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS - 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS - 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS - 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u> - 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS - 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS - 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> - 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u> - 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1> - 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS - 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6> - 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2> - 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5> - 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4> - 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1> - 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4> - 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS - 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2> - 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4> - 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4> - 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3> - 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5> - 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1> - 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7> - 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1> - 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3> - 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS - 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4> - 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2> - 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4> - 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS - 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3> - 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7> - 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2> - 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4> - 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS - 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3> - 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4> - 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4> - 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS - 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7> - 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2> - 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3> - 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1> - 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0> - 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5> - 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS - 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5> - 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS - 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4> - 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0> - 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS - 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS - 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2> - 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5> - 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2> - 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS - 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7> - 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3> - 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2> - 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS - 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS - 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1> - 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2> - 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS - 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS - 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6> - 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7> - 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1> - 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS - 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> - 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1> - 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1> - 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4> - 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6> - 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4> - 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7> - 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7> - 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> - 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS - 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2> - 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2> - 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4> - 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS - 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS - 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2> - 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS - 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2> - 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS - 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6> - 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2> - 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6> - 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7> - 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4> - 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2> - 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS - 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2> - 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1> - 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0> - 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS - 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> - 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7> - 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3> - 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3> - 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> - 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4> - 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3> - 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2> - 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3> - 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0> - 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7> - 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6> - 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2> - 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3> - 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1> - 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1> - 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2> - 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4> - 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5> - 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4> - 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4> - 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4> - 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1> - 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS - 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4> - 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4> - 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4> - 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS - 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS - 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4> - 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0> - 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS - 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS - 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0> - 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2> - 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS - 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS - 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7> - 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7> - 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS - 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS - 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS - 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2> - 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2> - 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS - 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS - 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6> - 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3> - 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2> - 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS - 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> - 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2> - 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2> - 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4> - 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4> - 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7> - 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4> - 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7> - 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> - 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS - 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2> - 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2> - 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS - 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS - 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS - 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u> - 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2> - 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS - 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0> - 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2> - 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4> - 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3> - 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1> - 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0> - 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0> - 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0> - 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2> - 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1> - 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1> - 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4> - 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4> - 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0> - 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3> - 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1> - 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3> - 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4> - 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS - 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3> - 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2> - 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4> - 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS - 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4> - 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3> - 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3> - 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3> - 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1> - 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1> - 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3> - 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> - 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4> - 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7> - 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7> - 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7> - 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> - 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1> - 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2> - 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4> - 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0> - 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5> - 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6> - 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4> - 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2> - 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1> - 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS - 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5> - 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5> - 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3> - 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS - 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5> - 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5> - 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4> - 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS - 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS - 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6> - 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6> - 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3> - 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS - 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6> - 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6> - 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4> - 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6> - 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1> - 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5> - 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7> - 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7> - 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5> - 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7> - 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7> - 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4> - 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1> - 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS - 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2> - 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u> - 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4> - 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS - 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u> - 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u> - 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4> - 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u> - 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4> - 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS - 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS - 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1> - 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0> - 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1> - 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2> - 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0> - 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS - 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2> - 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1> - 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3> - 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3> - 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3> - 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4> - 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3> - 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3> - 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3> - 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4> - 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4> - 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2> - 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4> - 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4> - 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7> - 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4> - 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4> - 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4> - 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2> - 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4> - 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3> - 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> - 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4> - 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5> - 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4> - 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4> - 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> - 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS - 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4> - 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2> - 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4> - 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS - 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS - 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS - 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> - 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS - 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS - 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4> - 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5> - 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5> - 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS - 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS - 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS - 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5> - 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS - 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS - 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2> - 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5> - 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6> - 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS - 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6> - 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS - 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4> - 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS - 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2> - 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4> - 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7> - 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4> - 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> - 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4> - 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4> - 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7> - 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> - 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS - 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS - 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS - 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u> - 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS - 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS - 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS - 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u> - 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS - 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0> - 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS - 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5> - 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0> - 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5> - 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7> - 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5> - 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0> - 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS - 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2> - 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5> - 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0> - 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS - 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4> - 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5> - 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6> - 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3> - 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS - 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS - 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3> - 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5> - 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5> - 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5> - 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7> - 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7> - 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS - 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5> - 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2> - 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1> - 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4> - 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3> - 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0> - 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5> - 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4> - 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5> - 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2> - 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS - 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4> - 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5> - 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4> - 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS - 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS - 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5> - 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6> - 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS - 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS - 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3> - 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4> - 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2> - 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5> - 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5> - 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0> - 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS - 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS - 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS - 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6> - 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6> - 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6> - 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS - 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5> - 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6> - 27705344U, // <4,5,6,7>: Cost 0 copy RHS - 27705344U, // <4,5,6,u>: Cost 0 copy RHS - 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS - 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4> - 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7> - 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5> - 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS - 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7> - 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4> - 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4> - 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS - 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS - 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS - 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0> - 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u> - 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS - 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS - 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7> - 27705344U, // <4,5,u,7>: Cost 0 copy RHS - 27705344U, // <4,5,u,u>: Cost 0 copy RHS - 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0> - 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS - 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6> - 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0> - 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5> - 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7> - 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0> - 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS - 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS - 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2> - 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1> - 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0> - 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3> - 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5> - 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7> - 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7> - 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS - 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3> - 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4> - 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3> - 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2> - 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1> - 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6> - 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7> - 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7> - 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3> - 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1> - 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2> - 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6> - 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2> - 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3> - 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6> - 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6> - 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6> - 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4> - 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2> - 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS - 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3> - 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4> - 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4> - 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS - 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS - 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS - 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4> - 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS - 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS - 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3> - 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3> - 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3> - 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6> - 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5> - 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6> - 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS - 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS - 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS - 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2> - 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3> - 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2> - 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6> - 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6> - 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6> - 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS - 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS - 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2> - 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2> - 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7> - 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4> - 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6> - 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> - 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3> - 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> - 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2> - 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS - 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS - 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS - 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1> - 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u> - 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS - 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS - 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS - 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS - 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0> - 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS - 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4> - 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4> - 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5> - 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0> - 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7> - 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4> - 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS - 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1> - 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1> - 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4> - 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5> - 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS - 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7> - 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7> - 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3> - 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1> - 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS - 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3> - 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2> - 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4> - 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7> - 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7> - 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7> - 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3> - 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7> - 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2> - 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4> - 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4> - 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3> - 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6> - 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7> - 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7> - 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4> - 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7> - 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1> - 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3> - 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7> - 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5> - 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4> - 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS - 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4> - 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7> - 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS - 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2> - 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7> - 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5> - 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5> - 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6> - 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5> - 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5> - 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7> - 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2> - 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS - 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2> - 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2> - 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2> - 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS - 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6> - 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3> - 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7> - 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS - 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS - 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4> - 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7> - 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4> - 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7> - 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7> - 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7> - 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7> - 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7> - 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS - 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS - 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2> - 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2> - 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS - 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u> - 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3> - 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7> - 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS - 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0> - 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS - 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u> - 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2> - 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5> - 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0> - 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u> - 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0> - 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS - 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2> - 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1> - 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3> - 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3> - 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7> - 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7> - 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3> - 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS - 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u> - 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2> - 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u> - 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u> - 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4> - 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7> - 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3> - 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u> - 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2> - 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2> - 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u> - 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3> - 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6> - 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7> - 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u> - 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2> - 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS - 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2> - 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4> - 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4> - 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS - 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS - 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS - 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6> - 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS - 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS - 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS - 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5> - 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS - 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS - 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS - 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS - 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS - 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS - 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS - 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2> - 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS - 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6> - 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS - 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6> - 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS - 27705344U, // <4,u,6,7>: Cost 0 copy RHS - 27705344U, // <4,u,6,u>: Cost 0 copy RHS - 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS - 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4> - 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7> - 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u> - 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS - 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> - 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7> - 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> - 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS - 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS - 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS - 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS - 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u> - 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS - 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS - 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS - 27705344U, // <4,u,u,7>: Cost 0 copy RHS - 27705344U, // <4,u,u,u>: Cost 0 copy RHS - 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0> - 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1> - 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2> - 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5> - 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5> - 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0> - 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0> - 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0> - 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2> - 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS - 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1> - 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS - 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7> - 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS - 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1> - 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7> - 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2> - 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2> - 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> - 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4> - 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5> - 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5> - 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5> - 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4> - 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> - 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5> - 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5> - 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4> - 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5> - 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5> - 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0> - 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4> - 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0> - 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7> - 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5> - 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS - 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> - 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6> - 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5> - 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS - 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS - 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5> - 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> - 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> - 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1> - 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS - 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS - 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0> - 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5> - 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0> - 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0> - 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS - 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS - 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0> - 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS - 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7> - 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6> - 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5> - 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7> - 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6> - 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5> - 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS - 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS - 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0> - 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7> - 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2> - 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS - 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0> - 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7> - 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7> - 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS - 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2> - 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5> - 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS - 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5> - 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6> - 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS - 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u> - 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> - 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0> - 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS - 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2> - 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2> - 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5> - 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0> - 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7> - 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0> - 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS - 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1> - 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1> - 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0> - 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3> - 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5> - 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5> - 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5> - 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5> - 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3> - 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2> - 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3> - 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2> - 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0> - 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5> - 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3> - 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7> - 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0> - 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0> - 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS - 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3> - 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5> - 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5> - 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5> - 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7> - 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7> - 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5> - 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3> - 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1> - 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5> - 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5> - 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5> - 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4> - 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS - 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6> - 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4> - 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1> - 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1> - 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1> - 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1> - 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7> - 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5> - 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5> - 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0> - 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7> - 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1> - 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS - 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7> - 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3> - 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7> - 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS - 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7> - 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6> - 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1> - 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1> - 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS - 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1> - 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1> - 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS - 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS - 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3> - 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6> - 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7> - 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS - 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1> - 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS - 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5> - 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS - 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5> - 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS - 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7> - 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1> - 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS - 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0> - 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS - 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2> - 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2> - 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1> - 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1> - 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4> - 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0> - 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS - 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2> - 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2> - 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5> - 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5> - 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS - 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0> - 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3> - 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1> - 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5> - 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS - 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3> - 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2> - 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3> - 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5> - 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7> - 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6> - 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5> - 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3> - 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1> - 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5> - 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5> - 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5> - 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5> - 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5> - 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5> - 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7> - 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5> - 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2> - 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3> - 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5> - 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5> - 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6> - 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS - 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2> - 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4> - 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS - 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS - 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3> - 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7> - 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS - 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS - 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5> - 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7> - 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1> - 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS - 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS - 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3> - 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3> - 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7> - 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5> - 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7> - 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7> - 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1> - 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7> - 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS - 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2> - 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7> - 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS - 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS - 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5> - 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6> - 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7> - 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS - 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1> - 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS - 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u> - 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3> - 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5> - 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS - 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5> - 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u> - 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5> - 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0> - 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2> - 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0> - 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2> - 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1> - 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2> - 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0> - 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0> - 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2> - 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3> - 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1> - 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3> - 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5> - 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3> - 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7> - 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7> - 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5> - 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3> - 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1> - 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5> - 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2> - 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4> - 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5> - 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4> - 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3> - 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3> - 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4> - 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1> - 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3> - 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2> - 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3> - 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5> - 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5> - 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7> - 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5> - 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5> - 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5> - 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0> - 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3> - 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5> - 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5> - 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6> - 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5> - 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4> - 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6> - 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS - 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5> - 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5> - 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5> - 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS - 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5> - 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0> - 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5> - 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS - 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS - 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6> - 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6> - 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6> - 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS - 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0> - 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6> - 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4> - 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS - 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS - 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7> - 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2> - 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2> - 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS - 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3> - 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3> - 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7> - 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS - 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS - 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u> - 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2> - 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2> - 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS - 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6> - 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3> - 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u> - 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS - 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS - 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS - 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2> - 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5> - 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5> - 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1> - 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0> - 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0> - 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS - 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1> - 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4> - 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4> - 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7> - 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4> - 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0> - 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5> - 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1> - 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1> - 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4> - 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4> - 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4> - 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5> - 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4> - 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3> - 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3> - 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5> - 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5> - 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS - 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4> - 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4> - 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3> - 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4> - 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0> - 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5> - 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7> - 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4> - 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS - 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4> - 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3> - 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4> - 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4> - 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5> - 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4> - 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4> - 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5> - 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS - 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5> - 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3> - 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2> - 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS - 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5> - 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS - 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS - 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS - 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS - 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7> - 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2> - 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6> - 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS - 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5> - 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7> - 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5> - 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS - 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS - 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4> - 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7> - 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7> - 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS - 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5> - 94817590U, // <5,4,7,6>: Cost 1 vrev RHS - 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7> - 94965064U, // <5,4,7,u>: Cost 1 vrev RHS - 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS - 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u> - 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u> - 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4> - 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS - 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5> - 94825783U, // <5,4,u,6>: Cost 1 vrev RHS - 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5> - 94973257U, // <5,4,u,u>: Cost 1 vrev RHS - 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0> - 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS - 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2> - 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2> - 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1> - 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0> - 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7> - 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS - 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS - 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2> - 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5> - 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0> - 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3> - 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5> - 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7> - 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7> - 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3> - 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5> - 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS - 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3> - 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2> - 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4> - 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3> - 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3> - 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7> - 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7> - 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4> - 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2> - 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5> - 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3> - 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5> - 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6> - 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5> - 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7> - 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1> - 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2> - 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1> - 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5> - 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3> - 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4> - 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> - 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS - 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5> - 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6> - 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5> - 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS - 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3> - 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2> - 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2> - 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS - 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0> - 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7> - 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS - 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS - 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6> - 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3> - 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6> - 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5> - 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5> - 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6> - 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1> - 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1> - 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS - 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7> - 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5> - 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7> - 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS - 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5> - 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6> - 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS - 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS - 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS - 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS - 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5> - 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u> - 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS - 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7> - 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS - 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS - 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0> - 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS - 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2> - 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4> - 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5> - 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6> - 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7> - 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS - 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS - 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2> - 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1> - 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0> - 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3> - 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6> - 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7> - 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7> - 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS - 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6> - 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2> - 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3> - 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2> - 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1> - 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6> - 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6> - 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7> - 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3> - 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6> - 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2> - 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3> - 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6> - 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3> - 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6> - 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6> - 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7> - 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS - 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6> - 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS - 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5> - 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5> - 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5> - 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6> - 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS - 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6> - 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5> - 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS - 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS - 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3> - 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6> - 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4> - 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6> - 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5> - 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1> - 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS - 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS - 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS - 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4> - 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3> - 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6> - 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS - 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6> - 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6> - 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS - 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS - 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS - 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> - 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> - 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS - 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6> - 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6> - 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7> - 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS - 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS - 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS - 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> - 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS - 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS - 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3> - 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2> - 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS - 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0> - 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS - 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2> - 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0> - 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5> - 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7> - 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7> - 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0> - 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS - 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2> - 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1> - 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0> - 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7> - 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS - 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7> - 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7> - 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7> - 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7> - 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7> - 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3> - 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2> - 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1> - 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7> - 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7> - 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7> - 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7> - 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1> - 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2> - 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5> - 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1> - 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3> - 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6> - 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0> - 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7> - 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7> - 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2> - 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS - 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7> - 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0> - 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4> - 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS - 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS - 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> - 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7> - 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS - 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS - 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3> - 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3> - 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7> - 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS - 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5> - 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7> - 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS - 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS - 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0> - 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5> - 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2> - 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6> - 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4> - 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u> - 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> - 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7> - 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u> - 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS - 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1> - 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2> - 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3> - 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS - 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7> - 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3> - 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7> - 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS - 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS - 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS - 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0> - 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS - 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS - 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS - 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7> - 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS - 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS - 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0> - 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS - 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2> - 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2> - 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1> - 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1> - 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0> - 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0> - 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS - 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2> - 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1> - 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS - 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u> - 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u> - 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0> - 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u> - 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS - 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0> - 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> - 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2> - 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3> - 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u> - 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u> - 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7> - 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3> - 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1> - 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1> - 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u> - 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u> - 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3> - 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u> - 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0> - 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5> - 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u> - 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u> - 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u> - 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5> - 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6> - 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5> - 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> - 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS - 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> - 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u> - 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS - 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS - 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5> - 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3> - 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7> - 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS - 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS - 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS - 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS - 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS - 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS - 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6> - 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7> - 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS - 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS - 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> - 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS - 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7> - 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS - 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7> - 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS - 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS - 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> - 118708378U, // <5,u,7,6>: Cost 1 vrev RHS - 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS - 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS - 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS - 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS - 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS - 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS - 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS - 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS - 118716571U, // <5,u,u,6>: Cost 1 vrev RHS - 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS - 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS - 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0> - 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1> - 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2> - 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5> - 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6> - 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0> - 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6> - 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7> - 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2> - 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS - 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0> - 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS - 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6> - 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS - 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1> - 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1> - 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1> - 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS - 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2> - 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6> - 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6> - 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5> - 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6> - 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7> - 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6> - 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2> - 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6> - 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2> - 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4> - 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5> - 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3> - 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6> - 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6> - 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6> - 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7> - 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5> - 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6> - 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5> - 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6> - 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6> - 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6> - 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS - 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0> - 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0> - 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6> - 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS - 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6> - 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6> - 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0> - 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6> - 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6> - 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0> - 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS - 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS - 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0> - 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS - 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS - 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5> - 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0> - 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7> - 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0> - 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1> - 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS - 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS - 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0> - 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2> - 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7> - 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS - 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5> - 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0> - 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7> - 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS - 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2> - 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1> - 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS - 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5> - 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6> - 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS - 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u> - 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS - 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS - 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS - 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS - 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6> - 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2> - 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS - 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2> - 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1> - 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0> - 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2> - 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1> - 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1> - 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6> - 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3> - 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6> - 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5> - 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6> - 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1> - 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3> - 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS - 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3> - 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2> - 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0> - 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS - 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3> - 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3> - 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0> - 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0> - 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS - 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3> - 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6> - 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1> - 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6> - 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7> - 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3> - 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2> - 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3> - 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1> - 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6> - 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4> - 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6> - 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS - 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6> - 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0> - 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1> - 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6> - 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1> - 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7> - 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6> - 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7> - 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6> - 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6> - 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0> - 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS - 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7> - 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS - 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7> - 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6> - 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS - 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS - 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7> - 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6> - 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1> - 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS - 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS - 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7> - 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2> - 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS - 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS - 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5> - 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0> - 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7> - 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS - 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS - 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3> - 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6> - 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0> - 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6> - 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7> - 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u> - 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u> - 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0> - 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0> - 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS - 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2> - 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0> - 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6> - 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3> - 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4> - 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0> - 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS - 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1> - 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1> - 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0> - 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS - 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6> - 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7> - 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3> - 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1> - 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1> - 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1> - 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3> - 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2> - 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3> - 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6> - 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7> - 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6> - 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7> - 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3> - 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1> - 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0> - 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6> - 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4> - 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5> - 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6> - 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6> - 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4> - 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1> - 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> - 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u> - 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6> - 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6> - 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6> - 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS - 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0> - 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2> - 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2> - 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3> - 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3> - 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7> - 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6> - 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5> - 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5> - 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0> - 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS - 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6> - 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1> - 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3> - 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6> - 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7> - 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5> - 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7> - 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6> - 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1> - 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7> - 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS - 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2> - 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7> - 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS - 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS - 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7> - 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> - 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7> - 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS - 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2> - 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS - 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6> - 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS - 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5> - 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS - 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0> - 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS - 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS - 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0> - 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2> - 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4> - 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2> - 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2> - 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2> - 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0> - 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0> - 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2> - 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3> - 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1> - 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3> - 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1> - 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> - 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3> - 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0> - 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3> - 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> - 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4> - 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> - 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2> - 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0> - 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6> - 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7> - 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7> - 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6> - 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> - 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1> - 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3> - 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3> - 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3> - 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6> - 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5> - 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7> - 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7> - 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5> - 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS - 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3> - 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3> - 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6> - 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS - 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6> - 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6> - 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4> - 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6> - 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS - 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7> - 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5> - 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5> - 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6> - 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7> - 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6> - 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0> - 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6> - 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS - 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3> - 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6> - 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1> - 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6> - 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7> - 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6> - 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7> - 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS - 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS - 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7> - 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7> - 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2> - 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS - 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3> - 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3> - 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7> - 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS - 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS - 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2> - 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u> - 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2> - 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS - 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6> - 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0> - 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7> - 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS - 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0> - 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS - 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6> - 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1> - 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6> - 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1> - 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2> - 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0> - 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS - 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1> - 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1> - 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0> - 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS - 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS - 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0> - 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3> - 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1> - 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1> - 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4> - 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3> - 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2> - 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1> - 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4> - 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS - 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0> - 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0> - 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4> - 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2> - 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3> - 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6> - 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3> - 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6> - 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6> - 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5> - 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7> - 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6> - 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS - 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4> - 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4> - 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4> - 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4> - 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS - 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> - 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4> - 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> - 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS - 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3> - 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5> - 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6> - 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS - 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5> - 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS - 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS - 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS - 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS - 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2> - 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2> - 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2> - 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS - 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3> - 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3> - 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2> - 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS - 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS - 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4> - 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5> - 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7> - 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS - 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5> - 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6> - 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7> - 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS - 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS - 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS - 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u> - 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6> - 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6> - 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS - 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS - 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS - 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS - 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0> - 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS - 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6> - 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2> - 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1> - 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5> - 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7> - 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0> - 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0> - 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1> - 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5> - 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0> - 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS - 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6> - 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7> - 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4> - 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3> - 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3> - 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS - 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5> - 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2> - 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5> - 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS - 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6> - 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7> - 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS - 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5> - 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2> - 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5> - 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3> - 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3> - 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6> - 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6> - 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0> - 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3> - 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3> - 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS - 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5> - 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5> - 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5> - 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS - 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS - 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0> - 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6> - 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6> - 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS - 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2> - 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2> - 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2> - 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS - 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5> - 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6> - 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7> - 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7> - 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1> - 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4> - 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4> - 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4> - 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5> - 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5> - 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6> - 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0> - 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1> - 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS - 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7> - 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7> - 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2> - 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS - 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5> - 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6> - 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS - 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS - 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS - 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u> - 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u> - 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7> - 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS - 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS - 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6> - 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u> - 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u> - 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS - 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS - 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4> - 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1> - 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2> - 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3> - 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0> - 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS - 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS - 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2> - 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1> - 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0> - 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS - 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3> - 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7> - 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3> - 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3> - 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS - 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6> - 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3> - 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6> - 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1> - 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3> - 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7> - 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7> - 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3> - 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3> - 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2> - 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4> - 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6> - 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3> - 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5> - 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4> - 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6> - 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS - 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5> - 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS - 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2> - 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6> - 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6> - 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> - 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS - 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0> - 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6> - 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6> - 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS - 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3> - 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5> - 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0> - 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6> - 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5> - 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0> - 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS - 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS - 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS - 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2> - 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3> - 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2> - 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS - 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3> - 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS - 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7> - 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS - 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS - 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7> - 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7> - 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6> - 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS - 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4> - 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6> - 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS - 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS - 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS - 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS - 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u> - 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS - 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS - 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS - 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS - 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS - 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS - 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS - 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0> - 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0> - 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7> - 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2> - 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS - 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> - 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3> - 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5> - 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7> - 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7> - 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2> - 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3> - 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2> - 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3> - 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6> - 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7> - 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7> - 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1> - 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1> - 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3> - 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1> - 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7> - 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7> - 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1> - 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3> - 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> - 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5> - 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS - 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7> - 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS - 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> - 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> - 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> - 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> - 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> - 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2> - 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3> - 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> - 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6> - 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7> - 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> - 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> - 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> - 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7> - 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3> - 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7> - 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> - 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7> - 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7> - 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7> - 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2> - 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2> - 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS - 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0> - 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1> - 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6> - 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS - 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7> - 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1> - 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS - 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS - 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2> - 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0> - 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7> - 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0> - 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS - 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS - 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3> - 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS - 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7> - 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7> - 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3> - 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS - 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS - 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3> - 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS - 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u> - 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7> - 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3> - 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1> - 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3> - 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3> - 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7> - 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5> - 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1> - 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3> - 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6> - 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6> - 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS - 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6> - 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS - 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS - 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5> - 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7> - 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS - 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS - 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS - 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2> - 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7> - 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS - 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5> - 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS - 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS - 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS - 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7> - 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7> - 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS - 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS - 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5> - 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7> - 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS - 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS - 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS - 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS - 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS - 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1> - 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS - 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS - 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS - 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS - 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS - 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0> - 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1> - 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2> - 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0> - 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1> - 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6> - 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0> - 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7> - 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1> - 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS - 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5> - 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS - 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7> - 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS - 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7> - 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1> - 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0> - 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS - 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2> - 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5> - 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0> - 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1> - 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6> - 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> - 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7> - 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7> - 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2> - 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2> - 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0> - 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0> - 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3> - 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6> - 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0> - 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0> - 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7> - 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0> - 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4> - 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5> - 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6> - 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4> - 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6> - 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS - 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6> - 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5> - 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5> - 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS - 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3> - 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> - 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0> - 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6> - 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5> - 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0> - 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7> - 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0> - 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7> - 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> - 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7> - 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0> - 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7> - 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7> - 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6> - 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1> - 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7> - 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2> - 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS - 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS - 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7> - 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6> - 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7> - 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0> - 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7> - 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2> - 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2> - 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1> - 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS - 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u> - 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6> - 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS - 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u> - 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1> - 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS - 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS - 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS - 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS - 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2> - 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS - 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> - 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0> - 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1> - 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2> - 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1> - 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1> - 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6> - 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3> - 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5> - 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7> - 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1> - 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5> - 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3> - 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1> - 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3> - 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2> - 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0> - 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5> - 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3> - 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2> - 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0> - 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0> - 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0> - 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3> - 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0> - 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7> - 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5> - 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7> - 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7> - 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3> - 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7> - 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5> - 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5> - 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5> - 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5> - 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5> - 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS - 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1> - 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0> - 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS - 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS - 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7> - 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6> - 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7> - 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS - 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7> - 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1> - 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1> - 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7> - 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7> - 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7> - 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> - 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7> - 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7> - 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7> - 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7> - 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1> - 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7> - 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2> - 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1> - 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3> - 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS - 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS - 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7> - 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0> - 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7> - 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS - 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS - 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3> - 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0> - 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7> - 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS - 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7> - 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7> - 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS - 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7> - 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2> - 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2> - 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0> - 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0> - 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6> - 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7> - 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1> - 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2> - 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7> - 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3> - 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0> - 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0> - 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1> - 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3> - 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0> - 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3> - 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2> - 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1> - 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1> - 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3> - 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3> - 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5> - 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7> - 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6> - 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5> - 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3> - 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1> - 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5> - 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6> - 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7> - 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5> - 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7> - 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6> - 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0> - 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1> - 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6> - 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3> - 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4> - 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5> - 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6> - 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7> - 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0> - 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0> - 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7> - 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7> - 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3> - 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7> - 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7> - 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7> - 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7> - 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7> - 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0> - 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7> - 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS - 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3> - 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7> - 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7> - 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS - 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7> - 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7> - 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7> - 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7> - 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1> - 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0> - 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5> - 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS - 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS - 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7> - 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6> - 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7> - 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS - 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1> - 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5> - 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7> - 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5> - 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7> - 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7> - 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0> - 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7> - 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0> - 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2> - 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0> - 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2> - 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1> - 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2> - 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0> - 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0> - 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2> - 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1> - 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1> - 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3> - 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5> - 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6> - 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3> - 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1> - 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5> - 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5> - 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1> - 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0> - 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2> - 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0> - 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5> - 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4> - 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3> - 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3> - 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0> - 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1> - 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3> - 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3> - 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3> - 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4> - 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7> - 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3> - 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7> - 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3> - 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1> - 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2> - 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4> - 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5> - 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5> - 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6> - 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6> - 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4> - 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6> - 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS - 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3> - 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3> - 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7> - 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5> - 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7> - 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0> - 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0> - 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3> - 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7> - 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3> - 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3> - 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7> - 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7> - 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7> - 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6> - 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7> - 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3> - 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1> - 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5> - 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6> - 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7> - 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5> - 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7> - 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7> - 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7> - 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1> - 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u> - 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2> - 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3> - 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3> - 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u> - 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6> - 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3> - 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0> - 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2> - 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0> - 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS - 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2> - 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1> - 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5> - 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> - 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> - 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> - 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1> - 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2> - 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1> - 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3> - 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7> - 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3> - 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0> - 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3> - 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3> - 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0> - 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1> - 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3> - 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2> - 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1> - 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7> - 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> - 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0> - 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5> - 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0> - 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2> - 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5> - 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1> - 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3> - 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6> - 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4> - 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5> - 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7> - 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4> - 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2> - 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1> - 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4> - 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3> - 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4> - 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5> - 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> - 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7> - 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5> - 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS - 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7> - 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3> - 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5> - 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS - 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7> - 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS - 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7> - 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS - 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1> - 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5> - 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3> - 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4> - 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6> - 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7> - 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7> - 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> - 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2> - 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2> - 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2> - 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5> - 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6> - 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1> - 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> - 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1> - 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7> - 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7> - 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS - 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS - 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3> - 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u> - 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6> - 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1> - 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS - 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5> - 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS - 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS - 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS - 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS - 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0> - 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1> - 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1> - 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1> - 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2> - 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS - 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1> - 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1> - 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0> - 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7> - 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5> - 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3> - 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1> - 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> - 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> - 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1> - 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0> - 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7> - 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4> - 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7> - 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> - 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3> - 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3> - 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4> - 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS - 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5> - 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5> - 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3> - 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS - 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5> - 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5> - 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0> - 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0> - 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS - 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7> - 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5> - 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4> - 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS - 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS - 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5> - 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> - 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6> - 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1> - 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3> - 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3> - 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3> - 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4> - 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5> - 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> - 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> - 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7> - 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1> - 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7> - 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2> - 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4> - 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5> - 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> - 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> - 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0> - 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0> - 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS - 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3> - 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2> - 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7> - 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS - 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> - 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> - 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1> - 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3> - 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS - 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS - 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0> - 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u> - 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS - 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7> - 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7> - 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0> - 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3> - 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0> - 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS - 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4> - 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0> - 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2> - 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0> - 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0> - 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2> - 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS - 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1> - 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1> - 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0> - 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3> - 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS - 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7> - 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> - 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3> - 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1> - 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1> - 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3> - 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2> - 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1> - 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5> - 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7> - 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7> - 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> - 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> - 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2> - 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3> - 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3> - 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3> - 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5> - 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7> - 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6> - 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0> - 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0> - 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1> - 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3> - 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5> - 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4> - 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6> - 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS - 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0> - 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5> - 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS - 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS - 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3> - 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7> - 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4> - 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6> - 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5> - 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0> - 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7> - 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5> - 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1> - 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3> - 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3> - 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4> - 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4> - 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7> - 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6> - 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> - 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7> - 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1> - 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0> - 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2> - 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0> - 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5> - 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4> - 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6> - 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7> - 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1> - 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1> - 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS - 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2> - 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0> - 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5> - 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS - 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6> - 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3> - 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1> - 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0> - 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2> - 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0> - 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0> - 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1> - 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0> - 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0> - 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2> - 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2> - 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3> - 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1> - 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3> - 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5> - 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS - 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3> - 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1> - 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1> - 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3> - 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5> - 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0> - 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2> - 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0> - 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5> - 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3> - 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3> - 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3> - 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3> - 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1> - 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3> - 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6> - 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7> - 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5> - 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3> - 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7> - 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7> - 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7> - 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5> - 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7> - 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3> - 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4> - 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4> - 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6> - 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4> - 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6> - 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6> - 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS - 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7> - 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3> - 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7> - 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7> - 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7> - 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7> - 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5> - 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7> - 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS - 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0> - 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7> - 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7> - 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS - 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7> - 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7> - 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0> - 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7> - 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS - 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2> - 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2> - 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2> - 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS - 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7> - 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7> - 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS - 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS - 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS - 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2> - 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3> - 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0> - 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS - 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6> - 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7> - 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS - 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS - 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0> - 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2> - 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2> - 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2> - 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1> - 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1> - 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2> - 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS - 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2> - 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS - 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1> - 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS - 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3> - 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS - 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3> - 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1> - 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3> - 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS - 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2> - 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0> - 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0> - 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6> - 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4> - 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3> - 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3> - 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0> - 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1> - 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3> - 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6> - 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3> - 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5> - 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7> - 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7> - 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0> - 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1> - 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1> - 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5> - 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6> - 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5> - 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4> - 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6> - 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6> - 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6> - 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6> - 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS - 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u> - 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3> - 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7> - 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u> - 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u> - 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS - 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7> - 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS - 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1> - 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7> - 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u> - 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7> - 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6> - 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7> - 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u> - 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0> - 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7> - 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1> - 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3> - 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6> - 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7> - 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5> - 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7> - 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2> - 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS - 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS - 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1> - 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2> - 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS - 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0> - 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5> - 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6> - 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS - 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS - 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS - 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS - 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> - 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> - 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> - 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS - 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3> - 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0> - 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> - 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS - 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS - 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS - 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS - 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3> - 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS - 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7> - 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1> - 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2> - 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS - 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> - 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS - 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> - 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> - 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7> - 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> - 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> - 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> - 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> - 72589981U, // <u,0,3,2>: Cost 1 vrev LHS - 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3> - 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6> - 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0> - 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0> - 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3> - 73032403U, // <u,0,3,u>: Cost 1 vrev LHS - 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u> - 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> - 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> - 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2> - 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS - 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS - 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6> - 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> - 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> - 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS - 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS - 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> - 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0> - 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5> - 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5> - 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0> - 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS - 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0> - 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS - 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> - 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS - 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0> - 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS - 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u> - 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6> - 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1> - 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS - 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS - 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0> - 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7> - 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7> - 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS - 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u> - 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7> - 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7> - 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS - 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS - 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> - 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS - 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> - 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> - 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS - 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u> - 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS - 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS - 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1> - 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS - 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1> - 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> - 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS - 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> - 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0> - 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0> - 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> - 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS - 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS - 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2> - 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> - 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS - 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> - 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1> - 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> - 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS - 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS - 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> - 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1> - 835584U, // <u,1,2,3>: Cost 0 copy LHS - 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS - 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> - 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7> - 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2> - 835584U, // <u,1,2,u>: Cost 0 copy LHS - 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS - 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> - 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> - 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS - 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS - 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> - 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> - 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1> - 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> - 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1> - 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u> - 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u> - 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4> - 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS - 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS - 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1> - 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4> - 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4> - 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS - 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2> - 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2> - 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> - 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS - 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1> - 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1> - 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS - 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> - 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS - 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> - 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> - 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> - 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS - 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> - 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1> - 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1> - 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1> - 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS - 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7> - 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1> - 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS - 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS - 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7> - 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u> - 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7> - 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS - 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS - 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS - 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> - 835584U, // <u,1,u,3>: Cost 0 copy LHS - 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS - 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> - 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> - 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u> - 835584U, // <u,1,u,u>: Cost 0 copy LHS - 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2> - 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS - 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2> - 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> - 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5> - 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7> - 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> - 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u> - 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS - 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2> - 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1> - 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0> - 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> - 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS - 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7> - 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> - 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2> - 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1> - 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS - 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2> - 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS - 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> - 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS - 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> - 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7> - 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> - 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS - 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS - 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS - 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS - 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> - 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> - 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS - 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> - 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4> - 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5> - 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> - 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS - 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS - 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2> - 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> - 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS - 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS - 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0> - 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> - 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> - 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5> - 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5> - 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> - 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS - 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5> - 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS - 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2> - 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2> - 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> - 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS - 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> - 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> - 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1> - 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> - 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2> - 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2> - 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7> - 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS - 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS - 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6> - 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> - 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7> - 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS - 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS - 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS - 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS - 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> - 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS - 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS - 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS - 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> - 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS - 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS - 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0> - 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0> - 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> - 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0> - 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS - 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5> - 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7> - 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u> - 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> - 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS - 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2> - 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> - 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS - 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6> - 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u> - 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> - 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3> - 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3> - 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS - 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> - 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3> - 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> - 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS - 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS - 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4> - 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> - 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2> - 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS - 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS - 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4> - 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS - 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS - 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3> - 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5> - 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6> - 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> - 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS - 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS - 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> - 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6> - 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3> - 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u> - 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> - 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6> - 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6> - 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS - 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7> - 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7> - 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2> - 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS - 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u> - 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u> - 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS - 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2> - 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS - 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0> - 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS - 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6> - 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS - 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7> - 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS - 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS - 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0> - 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS - 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4> - 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> - 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5> - 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> - 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> - 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> - 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS - 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS - 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1> - 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4> - 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3> - 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS - 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS - 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> - 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> - 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4> - 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS - 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u> - 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2> - 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5> - 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS - 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> - 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS - 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> - 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS - 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2> - 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4> - 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3> - 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3> - 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6> - 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> - 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> - 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7> - 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> - 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS - 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4> - 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4> - 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> - 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS - 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS - 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> - 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> - 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS - 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS - 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> - 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5> - 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2> - 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS - 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS - 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS - 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS - 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS - 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS - 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2> - 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2> - 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2> - 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS - 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> - 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS - 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> - 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS - 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS - 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4> - 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7> - 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7> - 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS - 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> - 96808489U, // <u,4,7,6>: Cost 1 vrev RHS - 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7> - 96955963U, // <u,4,7,u>: Cost 1 vrev RHS - 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS - 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS - 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u> - 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2> - 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS - 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS - 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS - 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS - 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS - 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0> - 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS - 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5> - 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5> - 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> - 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> - 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5> - 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0> - 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS - 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS - 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5> - 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0> - 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7> - 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS - 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5> - 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3> - 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> - 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> - 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS - 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7> - 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5> - 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5> - 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS - 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> - 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7> - 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS - 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5> - 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS - 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3> - 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u> - 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3> - 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6> - 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5> - 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS - 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS - 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS - 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> - 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5> - 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4> - 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5> - 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS - 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5> - 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> - 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS - 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS - 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7> - 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2> - 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2> - 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS - 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS - 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> - 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> - 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS - 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS - 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6> - 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6> - 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6> - 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS - 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> - 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> - 27705344U, // <u,5,6,7>: Cost 0 copy RHS - 27705344U, // <u,5,6,u>: Cost 0 copy RHS - 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS - 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7> - 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2> - 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2> - 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS - 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> - 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> - 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS - 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS - 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS - 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u> - 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2> - 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u> - 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS - 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS - 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> - 27705344U, // <u,5,u,7>: Cost 0 copy RHS - 27705344U, // <u,5,u,u>: Cost 0 copy RHS - 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0> - 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS - 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6> - 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0> - 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6> - 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u> - 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0> - 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS - 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS - 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1> - 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1> - 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0> - 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3> - 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6> - 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7> - 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> - 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS - 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1> - 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS - 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2> - 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2> - 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1> - 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS - 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6> - 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7> - 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> - 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> - 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2> - 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3> - 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3> - 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3> - 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6> - 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6> - 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> - 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS - 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS - 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS - 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3> - 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4> - 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u> - 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> - 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS - 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6> - 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS - 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS - 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS - 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3> - 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7> - 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> - 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6> - 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5> - 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0> - 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS - 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5> - 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS - 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2> - 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3> - 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2> - 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS - 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3> - 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS - 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> - 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS - 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS - 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> - 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> - 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS - 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3> - 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3> - 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS - 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS - 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS - 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS - 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> - 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS - 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS - 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS - 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS - 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS - 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS - 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0> - 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0> - 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7> - 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2> - 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS - 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> - 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7> - 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS - 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7> - 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7> - 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u> - 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7> - 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7> - 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2> - 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7> - 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7> - 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7> - 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u> - 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7> - 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u> - 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u> - 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3> - 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3> - 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7> - 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7> - 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> - 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4> - 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS - 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0> - 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS - 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> - 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> - 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> - 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> - 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> - 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS - 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2> - 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> - 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS - 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6> - 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> - 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> - 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> - 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7> - 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2> - 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7> - 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> - 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7> - 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7> - 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS - 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS - 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2> - 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS - 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0> - 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1> - 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6> - 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS - 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7> - 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS - 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS - 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS - 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS - 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2> - 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0> - 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2> - 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0> - 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS - 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS - 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS - 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS - 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1> - 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3> - 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS - 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS - 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2> - 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS - 835584U, // <u,u,2,3>: Cost 0 copy LHS - 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS - 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> - 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3> - 835584U, // <u,u,2,u>: Cost 0 copy LHS - 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS - 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 120371557U, // <u,u,3,2>: Cost 1 vrev LHS - 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS - 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS - 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7> - 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS - 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS - 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS - 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4> - 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6> - 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4> - 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS - 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS - 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6> - 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS - 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS - 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5> - 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7> - 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS - 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS - 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS - 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS - 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS - 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS - 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2> - 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7> - 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS - 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6> - 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS - 27705344U, // <u,u,6,7>: Cost 0 copy RHS - 27705344U, // <u,u,6,u>: Cost 0 copy RHS - 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS - 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7> - 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7> - 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS - 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS - 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> - 120699277U, // <u,u,7,6>: Cost 1 vrev RHS - 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS - 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS - 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS - 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS - 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS - 835584U, // <u,u,u,3>: Cost 0 copy LHS - 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS - 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS - 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS - 27705344U, // <u,u,u,7>: Cost 0 copy RHS - 835584U, // <u,u,u,u>: Cost 0 copy LHS + 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS + 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS + 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0> + 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> + 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS + 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3> + 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3> + 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> + 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS + 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0> + 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS + 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS + 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0> + 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5> + 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7> + 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1> + 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1> + 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS + 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0> + 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1> + 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS + 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0> + 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6> + 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6> + 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7> + 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> + 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS + 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> + 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0> + 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0> + 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3> + 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6> + 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6> + 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7> + 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0> + 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> + 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1> + 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS + 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS + 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4> + 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6> + 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS + 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS + 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5> + 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS + 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7> + 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3> + 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7> + 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5> + 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6> + 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5> + 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7> + 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7> + 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7> + 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7> + 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS + 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3> + 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7> + 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS + 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0> + 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6> + 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0> + 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0> + 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> + 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0> + 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7> + 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0> + 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6> + 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0> + 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7> + 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7> + 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> + 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS + 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS + 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS + 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u> + 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS + 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS + 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS + 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u> + 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS + 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1> + 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS + 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1> + 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0> + 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5> + 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7> + 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1> + 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0> + 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS + 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1> + 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1> + 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0> + 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3> + 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS + 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7> + 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3> + 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1> + 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS + 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS + 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1> + 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2> + 835584U, // <0,1,2,3>: Cost 0 copy LHS + 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS + 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7> + 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7> + 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2> + 835584U, // <0,1,2,u>: Cost 0 copy LHS + 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0> + 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3> + 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0> + 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0> + 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS + 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7> + 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0> + 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1> + 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3> + 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS + 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1> + 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1> + 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4> + 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS + 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS + 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS + 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4> + 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS + 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1> + 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1> + 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0> + 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7> + 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6> + 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1> + 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1> + 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1> + 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7> + 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS + 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7> + 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1> + 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7> + 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS + 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7> + 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1> + 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1> + 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1> + 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0> + 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1> + 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0> + 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1> + 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6> + 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0> + 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0> + 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7> + 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2> + 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS + 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS + 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS + 835584U, // <0,1,u,3>: Cost 0 copy LHS + 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS + 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS + 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS + 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u> + 835584U, // <0,1,u,u>: Cost 0 copy LHS + 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0> + 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS + 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS + 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0> + 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6> + 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7> + 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7> + 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0> + 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS + 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2> + 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1> + 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2> + 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> + 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS + 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7> + 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7> + 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7> + 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2> + 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2> + 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2> + 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2> + 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3> + 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS + 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3> + 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7> + 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2> + 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS + 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2> + 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> + 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3> + 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> + 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6> + 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> + 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3> + 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0> + 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> + 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS + 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3> + 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4> + 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4> + 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS + 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS + 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS + 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> + 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS + 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7> + 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3> + 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7> + 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6> + 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6> + 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5> + 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0> + 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS + 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS + 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1> + 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2> + 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3> + 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7> + 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5> + 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6> + 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6> + 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2> + 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7> + 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> + 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2> + 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2> + 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0> + 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6> + 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2> + 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2> + 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7> + 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> + 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u> + 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS + 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS + 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3> + 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS + 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS + 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS + 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS + 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS + 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0> + 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2> + 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0> + 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3> + 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS + 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6> + 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7> + 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0> + 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS + 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2> + 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1> + 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3> + 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3> + 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6> + 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6> + 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1> + 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3> + 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2> + 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS + 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2> + 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2> + 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3> + 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS + 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6> + 2598154746U, // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3> + 2598155258U, // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2> + 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS + 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2> + 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3> + 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3> + 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3> + 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6> + 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6> + 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7> + 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7> + 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3> + 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2> + 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4> + 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4> + 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3> + 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6> + 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6> + 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS + 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4> + 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6> + 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS + 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2> + 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2> + 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7> + 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5> + 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7> + 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7> + 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0> + 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5> + 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7> + 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3> + 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7> + 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7> + 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7> + 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6> + 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6> + 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0> + 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0> + 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1> + 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3> + 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7> + 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7> + 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5> + 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7> + 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7> + 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0> + 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3> + 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS + 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u> + 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2> + 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3> + 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS + 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6> + 2598203898U, // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3> + 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0> + 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS + 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4> + 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS + 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4> + 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0> + 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6> + 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1> + 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS + 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0> + 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS + 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS + 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1> + 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0> + 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1> + 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS + 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS + 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS + 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> + 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS + 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS + 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2> + 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2> + 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4> + 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS + 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS + 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS + 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> + 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS + 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2> + 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2> + 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4> + 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3> + 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6> + 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6> + 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS + 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4> + 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2> + 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4> + 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0> + 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3> + 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4> + 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4> + 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS + 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS + 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4> + 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS + 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS + 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0> + 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5> + 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5> + 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS + 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6> + 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS + 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5> + 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS + 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6> + 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2> + 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6> + 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0> + 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6> + 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6> + 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0> + 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4> + 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2> + 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS + 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1> + 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4> + 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4> + 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS + 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> + 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0> + 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2> + 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> + 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS + 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS + 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS + 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u> + 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS + 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS + 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS + 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u> + 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS + 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0> + 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS + 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2> + 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5> + 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS + 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1> + 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1> + 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS + 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS + 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS + 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3> + 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2> + 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2> + 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1> + 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5> + 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0> + 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3> + 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS + 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS + 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2> + 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7> + 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2> + 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS + 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5> + 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6> + 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS + 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS + 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2> + 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3> + 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1> + 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3> + 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5> + 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0> + 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7> + 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0> + 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0> + 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1> + 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4> + 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4> + 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5> + 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6> + 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS + 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5> + 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6> + 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS + 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0> + 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0> + 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5> + 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0> + 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5> + 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5> + 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0> + 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7> + 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7> + 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS + 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0> + 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3> + 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4> + 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS + 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0> + 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7> + 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0> + 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0> + 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS + 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0> + 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7> + 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2> + 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS + 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7> + 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0> + 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0> + 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS + 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS + 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS + 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0> + 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u> + 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u> + 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS + 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u> + 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0> + 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0> + 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS + 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS + 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2> + 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4> + 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6> + 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0> + 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0> + 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS + 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS + 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS + 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1> + 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3> + 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3> + 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS + 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1> + 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6> + 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS + 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS + 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS + 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2> + 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2> + 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1> + 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2> + 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3> + 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6> + 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS + 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS + 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2> + 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7> + 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0> + 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3> + 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6> + 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7> + 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0> + 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0> + 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0> + 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS + 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2> + 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4> + 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2> + 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS + 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS + 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0> + 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS + 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS + 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> + 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0> + 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7> + 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0> + 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5> + 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0> + 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7> + 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS + 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> + 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0> + 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3> + 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6> + 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0> + 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4> + 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6> + 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6> + 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7> + 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7> + 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1> + 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0> + 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7> + 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0> + 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5> + 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6> + 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2> + 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0> + 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1> + 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS + 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS + 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u> + 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0> + 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u> + 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS + 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0> + 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS + 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS + 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0> + 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS + 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0> + 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0> + 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5> + 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6> + 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7> + 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7> + 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS + 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1> + 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1> + 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0> + 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5> + 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1> + 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3> + 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7> + 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7> + 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1> + 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS + 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2> + 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2> + 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0> + 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS + 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7> + 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2> + 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7> + 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2> + 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2> + 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3> + 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3> + 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3> + 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6> + 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7> + 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> + 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0> + 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7> + 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS + 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4> + 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4> + 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7> + 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6> + 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS + 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7> + 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5> + 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS + 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0> + 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7> + 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5> + 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7> + 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5> + 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7> + 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7> + 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0> + 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7> + 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0> + 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6> + 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7> + 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7> + 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS + 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7> + 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6> + 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0> + 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7> + 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1> + 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0> + 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7> + 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0> + 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS + 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7> + 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7> + 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7> + 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7> + 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u> + 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u> + 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u> + 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0> + 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u> + 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS + 2669066421U, // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7> + 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0> + 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u> + 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS + 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS + 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS + 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2> + 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS + 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6> + 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS + 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0> + 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS + 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1> + 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS + 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS + 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> + 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS + 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS + 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7> + 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS + 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS + 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS + 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2> + 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS + 835584U, // <0,u,2,3>: Cost 0 copy LHS + 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS + 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6> + 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS + 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2> + 835584U, // <0,u,2,u>: Cost 0 copy LHS + 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2> + 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> + 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u> + 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> + 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6> + 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> + 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> + 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u> + 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> + 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS + 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS + 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS + 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4> + 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS + 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS + 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS + 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6> + 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS + 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS + 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0> + 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7> + 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7> + 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS + 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u> + 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS + 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS + 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS + 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS + 2262496983U, // <0,u,6,1>: Cost 3 vrev <u,0,1,6> + 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u> + 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7> + 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS + 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u> + 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u> + 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u> + 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u> + 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS + 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0> + 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7> + 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u> + 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS + 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6> + 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7> + 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7> + 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS + 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS + 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS + 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS + 835584U, // <0,u,u,3>: Cost 0 copy LHS + 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS + 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS + 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS + 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u> + 835584U, // <0,u,u,u>: Cost 0 copy LHS + 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0> + 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1> + 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2> + 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0> + 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1> + 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0> + 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7> + 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0> + 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1> + 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS + 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1> + 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS + 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3> + 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS + 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1> + 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7> + 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2> + 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS + 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1> + 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1> + 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0> + 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1> + 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6> + 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7> + 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0> + 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2> + 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1> + 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0> + 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1> + 67944550U, // <1,0,3,2>: Cost 1 vrev LHS + 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3> + 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS + 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7> + 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7> + 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3> + 68386972U, // <1,0,3,u>: Cost 1 vrev LHS + 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1> + 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5> + 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6> + 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1> + 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1> + 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS + 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1> + 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4> + 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS + 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0> + 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS + 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS + 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5> + 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5> + 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0> + 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0> + 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS + 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS + 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1> + 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7> + 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7> + 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6> + 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1> + 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0> + 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0> + 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0> + 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0> + 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0> + 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1> + 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7> + 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0> + 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6> + 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0> + 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0> + 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7> + 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0> + 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0> + 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1> + 67985515U, // <1,0,u,2>: Cost 1 vrev LHS + 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1> + 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6> + 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS + 2669082807U, // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0> + 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u> + 68427937U, // <1,0,u,u>: Cost 1 vrev LHS + 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1> + 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS + 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1> + 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2> + 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5> + 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1> + 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7> + 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0> + 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1> + 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS + 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0> + 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3> + 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS + 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7> + 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7> + 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> + 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS + 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2> + 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1> + 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2> + 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1> + 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS + 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7> + 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7> + 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0> + 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1> + 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2> + 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1> + 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2> + 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS + 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6> + 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7> + 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7> + 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3> + 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS + 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS + 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4> + 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0> + 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5> + 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS + 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS + 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS + 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4> + 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS + 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1> + 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3> + 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2> + 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7> + 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5> + 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5> + 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0> + 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS + 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7> + 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2> + 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7> + 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3> + 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7> + 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6> + 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5> + 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6> + 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0> + 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0> + 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1> + 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> + 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3> + 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS + 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6> + 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7> + 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0> + 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7> + 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> + 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS + 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3> + 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS + 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS + 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS + 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7> + 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS + 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS + 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0> + 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS + 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2> + 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1> + 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5> + 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7> + 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2> + 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1> + 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS + 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2> + 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1> + 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0> + 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS + 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS + 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7> + 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7> + 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0> + 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS + 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2> + 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2> + 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2> + 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3> + 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5> + 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7> + 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7> + 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1> + 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3> + 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS + 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3> + 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS + 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> + 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> + 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> + 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS + 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2> + 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6> + 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4> + 2598759198U, // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2> + 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4> + 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS + 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6> + 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0> + 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS + 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS + 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7> + 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2> + 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS + 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS + 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5> + 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0> + 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7> + 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS + 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1> + 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2> + 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3> + 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7> + 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5> + 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7> + 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6> + 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2> + 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7> + 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2> + 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2> + 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3> + 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1> + 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6> + 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0> + 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1> + 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1> + 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2> + 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS + 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2> + 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS + 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS + 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3> + 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> + 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS + 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0> + 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS + 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2> + 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3> + 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5> + 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6> + 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7> + 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1> + 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS + 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2> + 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1> + 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3> + 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS + 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS + 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7> + 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7> + 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1> + 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS + 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> + 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3> + 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> + 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> + 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> + 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5> + 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> + 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3> + 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> + 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS + 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3> + 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2> + 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3> + 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS + 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> + 2598826490U, // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3> + 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7> + 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS + 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS + 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3> + 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3> + 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4> + 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS + 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS + 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6> + 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4> + 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS + 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS + 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7> + 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5> + 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5> + 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS + 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5> + 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4> + 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS + 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS + 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0> + 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1> + 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3> + 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7> + 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3> + 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5> + 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3> + 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> + 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> + 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1> + 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3> + 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7> + 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3> + 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6> + 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5> + 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1> + 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7> + 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1> + 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS + 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u> + 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2> + 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS + 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS + 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS + 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6> + 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS + 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS + 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4> + 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS + 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4> + 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> + 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5> + 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1> + 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2> + 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1> + 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1> + 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2> + 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4> + 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0> + 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6> + 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1> + 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS + 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS + 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0> + 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS + 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2> + 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3> + 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2> + 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1> + 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4> + 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS + 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7> + 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2> + 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS + 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS + 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4> + 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3> + 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3> + 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6> + 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5> + 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6> + 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3> + 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u> + 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1> + 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4> + 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4> + 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4> + 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4> + 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS + 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6> + 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1> + 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS + 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS + 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5> + 2598913774U, // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1> + 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2> + 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS + 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS + 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS + 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS + 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS + 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS + 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1> + 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2> + 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2> + 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS + 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7> + 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7> + 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> + 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS + 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4> + 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1> + 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4> + 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4> + 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS + 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0> + 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1> + 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2> + 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4> + 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS + 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS + 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u> + 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1> + 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6> + 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1> + 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS + 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> + 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS + 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0> + 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS + 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5> + 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4> + 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> + 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5> + 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1> + 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1> + 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS + 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2> + 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1> + 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0> + 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7> + 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5> + 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1> + 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7> + 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS + 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3> + 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1> + 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5> + 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2> + 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1> + 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5> + 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7> + 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7> + 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS + 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1> + 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2> + 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7> + 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2> + 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3> + 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6> + 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5> + 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6> + 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS + 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS + 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1> + 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> + 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3> + 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4> + 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4> + 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS + 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS + 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6> + 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS + 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS + 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5> + 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5> + 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3> + 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS + 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5> + 2667794530U, // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0> + 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7> + 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS + 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1> + 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5> + 2667794938U, // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3> + 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4> + 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6> + 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6> + 2667795256U, // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6> + 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0> + 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0> + 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS + 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1> + 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1> + 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7> + 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS + 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7> + 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0> + 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1> + 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS + 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2> + 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS + 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3> + 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1> + 1594054682U, // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5> + 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS + 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7> + 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS + 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS + 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0> + 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS + 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6> + 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1> + 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5> + 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6> + 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6> + 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS + 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS + 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2> + 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1> + 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0> + 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3> + 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6> + 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7> + 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1> + 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS + 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS + 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1> + 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0> + 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2> + 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1> + 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS + 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7> + 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7> + 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS + 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS + 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS + 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1> + 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3> + 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3> + 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS + 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5> + 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3> + 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7> + 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u> + 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1> + 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0> + 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4> + 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4> + 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5> + 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS + 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS + 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS + 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS + 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2> + 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5> + 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5> + 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5> + 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6> + 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5> + 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0> + 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS + 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS + 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1> + 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6> + 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7> + 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0> + 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS + 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7> + 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6> + 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7> + 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7> + 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1> + 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1> + 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3> + 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7> + 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5> + 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1> + 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7> + 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS + 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1> + 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1> + 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS + 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2> + 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1> + 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5> + 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS + 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7> + 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7> + 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1> + 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1> + 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS + 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1> + 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1> + 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1> + 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0> + 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7> + 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7> + 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS + 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS + 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1> + 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0> + 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1> + 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS + 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7> + 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7> + 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1> + 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7> + 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS + 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7> + 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2> + 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1> + 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS + 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7> + 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2> + 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2> + 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS + 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS + 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7> + 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2> + 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3> + 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS + 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3> + 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3> + 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2> + 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS + 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1> + 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1> + 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4> + 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4> + 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4> + 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS + 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0> + 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6> + 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS + 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS + 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7> + 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2> + 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5> + 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS + 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6> + 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7> + 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS + 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS + 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1> + 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0> + 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7> + 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6> + 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS + 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6> + 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6> + 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0> + 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0> + 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1> + 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1> + 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1> + 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0> + 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS + 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7> + 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7> + 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7> + 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1> + 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS + 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS + 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2> + 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u> + 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS + 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u> + 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3> + 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2> + 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS + 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u> + 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS + 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2> + 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u> + 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u> + 1658771190U, // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1> + 2736789248U, // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2> + 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1> + 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS + 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS + 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS + 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS + 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS + 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7> + 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS + 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS + 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS + 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> + 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS + 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> + 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> + 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> + 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS + 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> + 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS + 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> + 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS + 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 115726126U, // <1,u,3,2>: Cost 1 vrev LHS + 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS + 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS + 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3> + 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> + 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS + 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS + 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1> + 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0> + 2263217967U, // <1,u,4,2>: Cost 3 vrev <u,1,2,4> + 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4> + 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS + 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS + 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6> + 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6> + 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS + 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS + 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7> + 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS + 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS + 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS + 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS + 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS + 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS + 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS + 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u> + 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0> + 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3> + 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7> + 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4> + 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7> + 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u> + 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> + 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> + 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u> + 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1> + 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1> + 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7> + 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS + 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u> + 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7> + 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7> + 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u> + 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS + 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS + 115767091U, // <1,u,u,2>: Cost 1 vrev LHS + 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS + 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS + 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS + 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS + 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS + 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS + 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0> + 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1> + 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2> + 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0> + 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS + 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5> + 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0> + 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7> + 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2> + 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1> + 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0> + 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS + 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS + 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS + 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7> + 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1> + 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2> + 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS + 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS + 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2> + 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0> + 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2> + 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS + 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3> + 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2> + 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2> + 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS + 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> + 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1> + 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2> + 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3> + 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS + 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5> + 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6> + 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7> + 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u> + 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS + 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5> + 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6> + 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2> + 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS + 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS + 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS + 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5> + 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6> + 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS + 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5> + 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7> + 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5> + 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5> + 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5> + 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0> + 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS + 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5> + 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0> + 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS + 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7> + 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6> + 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6> + 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0> + 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6> + 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7> + 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6> + 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> + 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3> + 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1> + 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0> + 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6> + 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2> + 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0> + 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7> + 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> + 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS + 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1> + 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS + 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2> + 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS + 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS + 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS + 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS + 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS + 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS + 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS + 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0> + 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2> + 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS + 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2> + 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0> + 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1> + 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2> + 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS + 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1> + 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0> + 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3> + 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS + 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7> + 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1> + 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2> + 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3> + 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS + 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1> + 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2> + 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0> + 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS + 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3> + 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7> + 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0> + 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0> + 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS + 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1> + 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2> + 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3> + 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS + 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5> + 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> + 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7> + 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u> + 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5> + 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6> + 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5> + 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4> + 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS + 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS + 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS + 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6> + 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4> + 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS + 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7> + 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1> + 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7> + 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS + 3740913668U, // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5> + 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5> + 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS + 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7> + 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS + 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2> + 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7> + 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS + 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS + 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5> + 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6> + 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1> + 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS + 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2> + 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1> + 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0> + 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7> + 3740915046U, // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6> + 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7> + 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1> + 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u> + 2669827714U, // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2> + 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS + 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1> + 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2> + 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u> + 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS + 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5> + 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> + 2669828370U, // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1> + 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u> + 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2> + 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS + 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0> + 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2> + 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS + 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7> + 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6> + 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2> + 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2> + 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2> + 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1> + 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0> + 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS + 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS + 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7> + 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3> + 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1> + 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS + 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS + 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2> + 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS + 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3> + 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS + 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7> + 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7> + 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> + 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS + 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1> + 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0> + 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2> + 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS + 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5> + 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5> + 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6> + 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3> + 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS + 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS + 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2> + 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5> + 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5> + 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS + 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS + 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS + 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4> + 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS + 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2> + 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3> + 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7> + 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS + 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5> + 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5> + 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0> + 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS + 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS + 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6> + 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3> + 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3> + 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7> + 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS + 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7> + 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6> + 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2> + 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7> + 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1> + 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5> + 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2> + 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS + 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6> + 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7> + 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7> + 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7> + 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1> + 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS + 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS + 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS + 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS + 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS + 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS + 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6> + 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS + 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS + 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS + 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3> + 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7> + 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> + 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0> + 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS + 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS + 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7> + 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0> + 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> + 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2> + 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1> + 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> + 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> + 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5> + 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7> + 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1> + 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> + 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> + 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3> + 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3> + 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3> + 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> + 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7> + 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3> + 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> + 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2> + 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS + 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4> + 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> + 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2> + 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS + 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS + 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> + 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4> + 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS + 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS + 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3> + 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4> + 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6> + 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> + 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7> + 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7> + 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> + 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6> + 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3> + 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5> + 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> + 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7> + 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> + 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1> + 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2> + 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3> + 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3> + 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1> + 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6> + 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2> + 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1> + 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2> + 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2> + 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS + 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0> + 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1> + 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6> + 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS + 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7> + 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1> + 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS + 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4> + 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS + 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4> + 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4> + 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2> + 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1> + 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> + 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1> + 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> + 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2> + 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1> + 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4> + 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS + 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4> + 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7> + 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3> + 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1> + 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS + 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4> + 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3> + 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4> + 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5> + 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4> + 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS + 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS + 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0> + 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS + 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4> + 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1> + 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4> + 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3> + 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4> + 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> + 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> + 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7> + 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> + 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2> + 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1> + 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2> + 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3> + 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4> + 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS + 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS + 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7> + 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS + 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5> + 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2> + 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5> + 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5> + 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS + 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5> + 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS + 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS + 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS + 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS + 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2> + 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4> + 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2> + 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS + 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS + 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6> + 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1> + 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS + 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> + 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2> + 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4> + 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4> + 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6> + 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0> + 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1> + 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7> + 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> + 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS + 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS + 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2> + 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2> + 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS + 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5> + 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS + 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS + 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS + 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0> + 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS + 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2> + 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5> + 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5> + 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5> + 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7> + 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS + 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS + 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2> + 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1> + 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0> + 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7> + 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5> + 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7> + 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2> + 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3> + 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5> + 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2> + 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1> + 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2> + 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1> + 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5> + 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1> + 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7> + 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS + 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS + 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS + 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3> + 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5> + 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3> + 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6> + 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3> + 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> + 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7> + 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u> + 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS + 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4> + 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5> + 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4> + 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS + 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS + 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6> + 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS + 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS + 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS + 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7> + 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5> + 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5> + 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS + 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5> + 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6> + 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7> + 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7> + 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS + 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6> + 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3> + 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6> + 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5> + 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7> + 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6> + 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS + 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS + 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS + 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2> + 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7> + 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2> + 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS + 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7> + 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2> + 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS + 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS + 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS + 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS + 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u> + 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u> + 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6> + 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS + 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> + 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7> + 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS + 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0> + 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS + 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2> + 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0> + 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6> + 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6> + 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1> + 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2> + 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS + 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2> + 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1> + 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0> + 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS + 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6> + 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7> + 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7> + 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2> + 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS + 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1> + 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3> + 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2> + 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1> + 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6> + 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7> + 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6> + 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS + 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1> + 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2> + 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1> + 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7> + 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3> + 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6> + 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5> + 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> + 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS + 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS + 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2> + 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3> + 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0> + 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4> + 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4> + 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS + 2667875700U, // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6> + 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS + 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS + 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2> + 2667876048U, // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3> + 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7> + 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5> + 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5> + 2667876356U, // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5> + 2667876450U, // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0> + 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS + 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS + 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS + 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1> + 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6> + 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3> + 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS + 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5> + 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6> + 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS + 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS + 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1> + 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2> + 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2> + 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0> + 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5> + 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7> + 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2> + 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0> + 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1> + 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2> + 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS + 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3> + 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS + 1594136612U, // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6> + 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS + 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7> + 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS + 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS + 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2> + 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2> + 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2> + 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0> + 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2> + 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7> + 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7> + 2599760953U, // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2> + 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2> + 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> + 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1> + 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0> + 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7> + 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5> + 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7> + 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7> + 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0> + 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> + 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2> + 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7> + 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2> + 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1> + 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6> + 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7> + 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7> + 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1> + 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7> + 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS + 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7> + 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2> + 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3> + 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS + 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3> + 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3> + 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2> + 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS + 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6> + 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4> + 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7> + 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4> + 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4> + 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS + 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u> + 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0> + 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS + 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2> + 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7> + 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7> + 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5> + 2599800118U, // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS + 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5> + 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7> + 2599802214U, // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6> + 2599802670U, // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS + 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS + 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7> + 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3> + 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6> + 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS + 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6> + 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6> + 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0> + 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS + 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1> + 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2> + 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7> + 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7> + 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS + 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5> + 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7> + 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7> + 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1> + 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS + 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2> + 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2> + 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u> + 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS + 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS + 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u> + 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2> + 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS + 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS + 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 1658631909U, // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2> + 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7> + 1658853120U, // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2> + 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS + 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS + 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5> + 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7> + 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3> + 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3> + 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS + 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1> + 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS + 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1> + 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS + 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> + 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS + 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS + 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2> + 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1> + 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3> + 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS + 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6> + 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5> + 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3> + 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS + 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS + 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS + 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4> + 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5> + 1190213513U, // <2,u,4,3>: Cost 2 vrev <u,2,3,4> + 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS + 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS + 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6> + 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS + 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS + 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5> + 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3> + 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5> + 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7> + 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS + 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7> + 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS + 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS + 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2> + 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3> + 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7> + 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS + 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS + 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1> + 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS + 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2> + 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u> + 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7> + 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1> + 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6> + 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2> + 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7> + 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2> + 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS + 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS + 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS + 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS + 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS + 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS + 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS + 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS + 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS + 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0> + 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> + 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> + 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1> + 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1> + 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7> + 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1> + 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0> + 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2> + 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS + 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0> + 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS + 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3> + 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS + 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7> + 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7> + 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1> + 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS + 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> + 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> + 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0> + 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> + 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> + 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7> + 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7> + 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7> + 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> + 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2> + 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> + 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1> + 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3> + 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6> + 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7> + 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7> + 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1> + 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3> + 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4> + 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> + 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> + 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1> + 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6> + 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS + 2666573172U, // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6> + 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4> + 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> + 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7> + 2666573520U, // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3> + 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS + 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6> + 2666573766U, // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6> + 2666573828U, // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5> + 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7> + 2666573992U, // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7> + 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS + 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7> + 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7> + 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7> + 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2> + 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1> + 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0> + 2666574648U, // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6> + 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0> + 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7> + 2666574842U, // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2> + 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7> + 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0> + 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7> + 2666575206U, // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6> + 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7> + 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3> + 2666575468U, // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7> + 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0> + 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2> + 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> + 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS + 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> + 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> + 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS + 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7> + 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u> + 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS + 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS + 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS + 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1> + 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> + 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS + 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1> + 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6> + 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0> + 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> + 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1> + 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1> + 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1> + 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> + 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5> + 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> + 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5> + 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3> + 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3> + 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1> + 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> + 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2> + 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0> + 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS + 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> + 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7> + 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0> + 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0> + 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS + 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> + 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> + 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1> + 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS + 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> + 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> + 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3> + 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> + 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS + 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5> + 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5> + 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5> + 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS + 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS + 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS + 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4> + 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5> + 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1> + 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7> + 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5> + 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> + 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5> + 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7> + 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0> + 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS + 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> + 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1> + 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> + 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7> + 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> + 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5> + 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> + 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7> + 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0> + 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7> + 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS + 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1> + 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2> + 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS + 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS + 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7> + 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7> + 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7> + 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS + 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS + 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3> + 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> + 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0> + 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS + 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> + 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> + 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS + 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3> + 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0> + 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS + 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0> + 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> + 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5> + 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7> + 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> + 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0> + 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS + 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2> + 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1> + 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0> + 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> + 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS + 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7> + 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> + 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1> + 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1> + 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1> + 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3> + 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2> + 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> + 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5> + 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> + 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6> + 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3> + 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3> + 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1> + 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0> + 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2> + 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3> + 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5> + 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1> + 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2> + 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0> + 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1> + 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS + 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4> + 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4> + 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> + 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS + 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS + 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0> + 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4> + 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS + 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS + 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5> + 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> + 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> + 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5> + 2665263108U, // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5> + 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> + 2665263272U, // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7> + 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5> + 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1> + 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3> + 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6> + 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> + 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5> + 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> + 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> + 2665263950U, // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1> + 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> + 2665264122U, // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2> + 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3> + 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2> + 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS + 2665264486U, // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6> + 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7> + 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7> + 2665264748U, // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7> + 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS + 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1> + 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS + 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2> + 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> + 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5> + 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS + 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0> + 2665265408U, // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1> + 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1> + 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0> + 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2> + 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0> + 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2> + 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1> + 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2> + 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2> + 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7> + 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2> + 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3> + 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3> + 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3> + 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1> + 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS + 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3> + 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3> + 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3> + 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3> + 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS + 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3> + 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3> + 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0> + 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS + 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4> + 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3> + 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3> + 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3> + 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS + 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3> + 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3> + 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS + 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS + 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5> + 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7> + 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3> + 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS + 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS + 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4> + 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4> + 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6> + 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4> + 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6> + 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS + 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7> + 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6> + 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS + 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5> + 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5> + 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5> + 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS + 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5> + 2665934946U, // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0> + 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS + 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS + 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7> + 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7> + 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7> + 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7> + 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7> + 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7> + 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6> + 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3> + 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3> + 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS + 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7> + 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7> + 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3> + 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS + 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7> + 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3> + 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7> + 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS + 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS + 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2> + 1592858504U, // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3> + 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS + 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS + 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6> + 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3> + 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS + 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS + 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0> + 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS + 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2> + 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4> + 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5> + 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1> + 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2> + 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0> + 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS + 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2> + 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1> + 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4> + 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3> + 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS + 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0> + 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> + 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4> + 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4> + 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS + 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3> + 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2> + 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1> + 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3> + 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3> + 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0> + 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4> + 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0> + 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2> + 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4> + 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4> + 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3> + 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1> + 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS + 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS + 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1> + 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2> + 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS + 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4> + 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4> + 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> + 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4> + 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS + 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6> + 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4> + 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS + 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS + 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> + 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5> + 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2> + 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS + 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7> + 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS + 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5> + 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS + 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1> + 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6> + 2666607098U, // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3> + 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6> + 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6> + 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> + 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7> + 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4> + 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2> + 2666607610U, // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2> + 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5> + 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4> + 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7> + 2666607974U, // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6> + 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0> + 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0> + 2666608236U, // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7> + 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4> + 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS + 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS + 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u> + 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1> + 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS + 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS + 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS + 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u> + 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS + 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0> + 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS + 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5> + 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4> + 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1> + 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> + 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1> + 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0> + 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS + 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS + 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1> + 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5> + 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5> + 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5> + 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3> + 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7> + 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3> + 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3> + 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3> + 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5> + 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2> + 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5> + 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5> + 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3> + 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7> + 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3> + 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5> + 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2> + 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5> + 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4> + 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3> + 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6> + 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5> + 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6> + 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS + 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS + 2600304742U, // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS + 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5> + 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4> + 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0> + 2600308022U, // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS + 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS + 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS + 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6> + 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6> + 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS + 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3> + 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5> + 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5> + 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS + 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5> + 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6> + 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7> + 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7> + 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1> + 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7> + 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6> + 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4> + 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5> + 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7> + 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7> + 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0> + 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0> + 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS + 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7> + 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2> + 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2> + 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS + 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7> + 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0> + 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7> + 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS + 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS + 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u> + 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2> + 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2> + 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS + 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7> + 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS + 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3> + 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS + 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS + 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2> + 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4> + 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4> + 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2> + 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7> + 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0> + 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS + 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2> + 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3> + 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1> + 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6> + 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1> + 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6> + 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3> + 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3> + 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3> + 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6> + 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS + 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3> + 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6> + 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0> + 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6> + 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6> + 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3> + 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3> + 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3> + 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2> + 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3> + 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3> + 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3> + 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6> + 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6> + 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2> + 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS + 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS + 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6> + 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3> + 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5> + 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6> + 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6> + 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6> + 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0> + 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS + 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6> + 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS + 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2> + 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7> + 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> + 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5> + 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6> + 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6> + 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5> + 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS + 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1> + 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3> + 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3> + 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6> + 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4> + 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7> + 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7> + 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7> + 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1> + 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7> + 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7> + 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3> + 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5> + 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1> + 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2> + 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS + 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1> + 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1> + 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2> + 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u> + 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6> + 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5> + 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6> + 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3> + 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1> + 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0> + 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS + 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2> + 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0> + 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5> + 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0> + 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0> + 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1> + 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS + 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2> + 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1> + 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0> + 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7> + 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS + 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7> + 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7> + 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3> + 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7> + 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS + 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3> + 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2> + 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1> + 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS + 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7> + 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7> + 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3> + 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7> + 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2> + 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3> + 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3> + 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3> + 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6> + 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7> + 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7> + 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7> + 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2> + 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS + 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7> + 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7> + 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7> + 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS + 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS + 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4> + 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6> + 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS + 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2> + 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3> + 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3> + 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0> + 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5> + 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5> + 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7> + 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS + 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS + 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1> + 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0> + 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3> + 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0> + 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5> + 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4> + 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6> + 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0> + 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7> + 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS + 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7> + 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7> + 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7> + 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS + 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7> + 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7> + 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7> + 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2> + 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS + 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0> + 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS + 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6> + 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS + 1595545808U, // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7> + 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS + 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0> + 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2> + 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2> + 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2> + 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1> + 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1> + 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2> + 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS + 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2> + 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u> + 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u> + 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS + 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3> + 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS + 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u> + 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u> + 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3> + 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS + 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2> + 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3> + 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u> + 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0> + 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6> + 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7> + 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u> + 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3> + 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0> + 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1> + 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3> + 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2> + 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS + 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5> + 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7> + 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7> + 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS + 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS + 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS + 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5> + 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6> + 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5> + 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4> + 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6> + 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6> + 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6> + 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6> + 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS + 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7> + 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5> + 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7> + 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS + 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5> + 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS + 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7> + 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS + 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1> + 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6> + 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7> + 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7> + 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6> + 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7> + 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0> + 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7> + 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS + 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7> + 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2> + 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS + 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS + 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7> + 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2> + 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS + 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1> + 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2> + 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS + 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS + 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5> + 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6> + 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS + 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0> + 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS + 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0> + 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1> + 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2> + 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4> + 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4> + 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0> + 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0> + 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0> + 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1> + 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS + 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4> + 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1> + 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS + 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4> + 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1> + 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1> + 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4> + 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4> + 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4> + 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0> + 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6> + 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7> + 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> + 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2> + 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4> + 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS + 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> + 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4> + 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4> + 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS + 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5> + 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7> + 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0> + 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> + 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS + 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5> + 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6> + 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2> + 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS + 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS + 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2> + 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4> + 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS + 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS + 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS + 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2> + 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5> + 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS + 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7> + 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7> + 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5> + 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS + 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS + 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1> + 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS + 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6> + 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS + 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6> + 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6> + 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0> + 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS + 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2> + 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS + 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS + 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0> + 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5> + 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5> + 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0> + 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7> + 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS + 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS + 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS + 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u> + 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS + 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS + 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> + 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u> + 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1> + 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS + 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6> + 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2> + 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5> + 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4> + 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1> + 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4> + 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS + 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2> + 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4> + 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4> + 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3> + 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5> + 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1> + 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7> + 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1> + 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3> + 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS + 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4> + 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2> + 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4> + 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS + 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3> + 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7> + 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2> + 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4> + 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS + 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3> + 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4> + 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4> + 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS + 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7> + 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2> + 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3> + 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1> + 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0> + 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5> + 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS + 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5> + 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS + 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4> + 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0> + 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS + 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS + 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2> + 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5> + 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2> + 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS + 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7> + 2600686074U, // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3> + 2600686586U, // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2> + 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS + 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS + 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1> + 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2> + 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS + 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS + 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6> + 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7> + 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1> + 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS + 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> + 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1> + 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1> + 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4> + 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6> + 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4> + 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7> + 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7> + 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> + 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS + 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2> + 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2> + 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4> + 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS + 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS + 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 2600686586U, // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2> + 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS + 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2> + 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS + 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6> + 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2> + 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6> + 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7> + 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4> + 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2> + 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS + 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2> + 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1> + 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0> + 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS + 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> + 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7> + 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3> + 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3> + 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> + 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4> + 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3> + 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2> + 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3> + 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0> + 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7> + 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6> + 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2> + 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3> + 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1> + 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1> + 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2> + 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4> + 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5> + 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4> + 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4> + 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4> + 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1> + 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS + 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4> + 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4> + 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4> + 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS + 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS + 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4> + 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0> + 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS + 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS + 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0> + 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2> + 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS + 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS + 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7> + 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7> + 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS + 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS + 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS + 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2> + 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2> + 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS + 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS + 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6> + 2600767994U, // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3> + 2600768506U, // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2> + 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS + 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> + 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2> + 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2> + 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4> + 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4> + 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7> + 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4> + 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7> + 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> + 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS + 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2> + 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2> + 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS + 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS + 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS + 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u> + 2600784890U, // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2> + 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS + 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0> + 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2> + 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4> + 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3> + 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1> + 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0> + 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0> + 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0> + 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2> + 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1> + 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1> + 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4> + 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4> + 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0> + 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3> + 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1> + 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3> + 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4> + 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS + 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3> + 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2> + 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4> + 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS + 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4> + 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3> + 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3> + 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3> + 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1> + 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1> + 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3> + 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> + 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4> + 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7> + 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7> + 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7> + 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> + 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1> + 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2> + 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4> + 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0> + 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5> + 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6> + 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4> + 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2> + 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1> + 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS + 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5> + 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5> + 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3> + 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS + 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5> + 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5> + 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4> + 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS + 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS + 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6> + 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6> + 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3> + 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS + 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6> + 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6> + 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4> + 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6> + 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1> + 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5> + 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7> + 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7> + 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5> + 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7> + 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7> + 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4> + 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1> + 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS + 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2> + 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u> + 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4> + 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS + 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u> + 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u> + 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4> + 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u> + 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4> + 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS + 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS + 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1> + 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0> + 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1> + 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2> + 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0> + 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS + 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2> + 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1> + 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3> + 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3> + 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3> + 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4> + 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3> + 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3> + 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3> + 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4> + 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4> + 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2> + 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4> + 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4> + 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7> + 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4> + 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4> + 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4> + 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2> + 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4> + 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3> + 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> + 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4> + 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5> + 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4> + 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4> + 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> + 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS + 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4> + 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2> + 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4> + 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS + 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS + 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS + 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> + 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS + 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS + 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4> + 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5> + 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5> + 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS + 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS + 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS + 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5> + 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS + 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS + 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2> + 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5> + 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6> + 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS + 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6> + 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS + 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4> + 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS + 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2> + 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4> + 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7> + 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4> + 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> + 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4> + 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4> + 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7> + 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> + 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS + 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS + 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS + 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u> + 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS + 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS + 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS + 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u> + 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS + 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0> + 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS + 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5> + 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0> + 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5> + 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7> + 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5> + 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0> + 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS + 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2> + 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5> + 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0> + 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS + 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4> + 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5> + 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6> + 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3> + 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS + 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS + 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3> + 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5> + 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5> + 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5> + 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7> + 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7> + 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS + 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5> + 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2> + 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1> + 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4> + 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3> + 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0> + 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5> + 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4> + 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5> + 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2> + 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS + 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4> + 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5> + 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4> + 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS + 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS + 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5> + 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6> + 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS + 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS + 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3> + 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4> + 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2> + 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5> + 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5> + 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0> + 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS + 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS + 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS + 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6> + 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6> + 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6> + 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS + 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5> + 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6> + 27705344U, // <4,5,6,7>: Cost 0 copy RHS + 27705344U, // <4,5,6,u>: Cost 0 copy RHS + 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS + 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4> + 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7> + 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5> + 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS + 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7> + 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4> + 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4> + 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS + 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS + 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS + 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0> + 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u> + 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS + 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS + 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7> + 27705344U, // <4,5,u,7>: Cost 0 copy RHS + 27705344U, // <4,5,u,u>: Cost 0 copy RHS + 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0> + 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS + 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6> + 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0> + 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5> + 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7> + 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0> + 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS + 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS + 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2> + 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1> + 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0> + 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3> + 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5> + 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7> + 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7> + 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS + 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3> + 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4> + 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3> + 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2> + 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1> + 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6> + 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7> + 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7> + 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3> + 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1> + 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2> + 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6> + 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2> + 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3> + 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6> + 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6> + 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6> + 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4> + 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2> + 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS + 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3> + 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4> + 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4> + 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS + 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS + 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS + 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4> + 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS + 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS + 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3> + 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3> + 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3> + 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6> + 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5> + 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6> + 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS + 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS + 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS + 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2> + 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3> + 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2> + 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6> + 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6> + 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6> + 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS + 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS + 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2> + 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2> + 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7> + 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4> + 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6> + 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> + 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3> + 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> + 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2> + 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS + 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS + 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS + 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1> + 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u> + 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS + 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS + 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS + 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS + 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0> + 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS + 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4> + 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4> + 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5> + 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0> + 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7> + 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4> + 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS + 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1> + 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1> + 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4> + 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5> + 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS + 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7> + 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7> + 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3> + 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1> + 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS + 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3> + 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2> + 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4> + 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7> + 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7> + 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7> + 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3> + 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7> + 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2> + 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4> + 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4> + 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3> + 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6> + 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7> + 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7> + 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4> + 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7> + 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1> + 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3> + 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7> + 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5> + 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4> + 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS + 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4> + 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7> + 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS + 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2> + 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7> + 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5> + 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5> + 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6> + 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5> + 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5> + 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7> + 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2> + 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS + 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2> + 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2> + 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2> + 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS + 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6> + 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3> + 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7> + 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS + 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS + 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4> + 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7> + 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4> + 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7> + 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7> + 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7> + 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7> + 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7> + 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS + 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS + 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2> + 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2> + 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS + 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u> + 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3> + 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7> + 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS + 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0> + 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS + 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u> + 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2> + 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5> + 2265397305U, // <4,u,0,5>: Cost 3 vrev <u,4,5,0> + 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u> + 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0> + 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS + 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2> + 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1> + 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3> + 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3> + 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7> + 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7> + 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3> + 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS + 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u> + 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2> + 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u> + 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u> + 2733864859U, // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4> + 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7> + 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3> + 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u> + 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2> + 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2> + 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u> + 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3> + 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6> + 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7> + 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u> + 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2> + 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS + 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2> + 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4> + 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4> + 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS + 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS + 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS + 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6> + 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS + 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS + 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS + 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5> + 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS + 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS + 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS + 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS + 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS + 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS + 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS + 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2> + 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS + 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6> + 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS + 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6> + 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS + 27705344U, // <4,u,6,7>: Cost 0 copy RHS + 27705344U, // <4,u,6,u>: Cost 0 copy RHS + 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS + 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4> + 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7> + 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u> + 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS + 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> + 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7> + 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> + 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS + 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS + 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS + 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS + 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u> + 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS + 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS + 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS + 27705344U, // <4,u,u,7>: Cost 0 copy RHS + 27705344U, // <4,u,u,u>: Cost 0 copy RHS + 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0> + 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1> + 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2> + 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5> + 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5> + 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0> + 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0> + 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0> + 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2> + 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS + 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1> + 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS + 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7> + 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS + 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1> + 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7> + 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2> + 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2> + 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> + 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4> + 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5> + 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5> + 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5> + 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4> + 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> + 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5> + 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5> + 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4> + 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5> + 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5> + 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0> + 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4> + 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0> + 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7> + 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5> + 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS + 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> + 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6> + 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5> + 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS + 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS + 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5> + 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> + 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> + 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1> + 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS + 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS + 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0> + 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5> + 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0> + 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0> + 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS + 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS + 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0> + 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS + 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7> + 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6> + 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5> + 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7> + 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6> + 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5> + 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS + 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS + 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0> + 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7> + 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2> + 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS + 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0> + 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7> + 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7> + 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS + 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2> + 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5> + 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS + 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5> + 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6> + 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS + 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u> + 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> + 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0> + 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS + 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2> + 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2> + 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5> + 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0> + 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7> + 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0> + 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS + 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1> + 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1> + 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0> + 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3> + 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5> + 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5> + 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5> + 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5> + 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3> + 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2> + 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3> + 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2> + 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0> + 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5> + 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3> + 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7> + 3808199610U, // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0> + 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0> + 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS + 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3> + 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5> + 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5> + 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5> + 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7> + 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7> + 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5> + 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3> + 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1> + 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5> + 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5> + 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5> + 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4> + 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS + 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6> + 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4> + 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1> + 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1> + 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1> + 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1> + 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7> + 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5> + 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5> + 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0> + 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7> + 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1> + 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS + 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7> + 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3> + 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7> + 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS + 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7> + 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6> + 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1> + 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1> + 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS + 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1> + 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1> + 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS + 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS + 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3> + 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6> + 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7> + 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS + 1591662326U, // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1> + 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS + 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5> + 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS + 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5> + 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS + 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7> + 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1> + 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS + 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0> + 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS + 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2> + 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2> + 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1> + 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1> + 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4> + 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0> + 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS + 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2> + 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2> + 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5> + 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5> + 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS + 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0> + 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3> + 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1> + 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5> + 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS + 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3> + 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2> + 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3> + 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5> + 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7> + 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6> + 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5> + 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3> + 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1> + 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5> + 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5> + 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5> + 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5> + 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5> + 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5> + 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7> + 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5> + 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2> + 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3> + 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5> + 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5> + 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6> + 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS + 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2> + 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4> + 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS + 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS + 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3> + 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7> + 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS + 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS + 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5> + 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7> + 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1> + 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS + 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS + 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3> + 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3> + 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7> + 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5> + 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7> + 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7> + 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1> + 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7> + 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS + 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2> + 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7> + 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS + 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS + 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5> + 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6> + 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7> + 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS + 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1> + 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS + 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u> + 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3> + 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5> + 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS + 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5> + 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u> + 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5> + 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0> + 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2> + 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0> + 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2> + 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1> + 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2> + 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0> + 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0> + 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2> + 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3> + 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1> + 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3> + 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5> + 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3> + 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7> + 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7> + 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5> + 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3> + 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1> + 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5> + 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2> + 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4> + 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5> + 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4> + 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3> + 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3> + 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4> + 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1> + 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3> + 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2> + 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3> + 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5> + 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5> + 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7> + 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5> + 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5> + 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5> + 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0> + 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3> + 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5> + 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5> + 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6> + 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5> + 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4> + 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6> + 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS + 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5> + 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5> + 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5> + 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS + 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5> + 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0> + 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5> + 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS + 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS + 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6> + 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6> + 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6> + 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS + 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0> + 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6> + 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4> + 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS + 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS + 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7> + 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2> + 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2> + 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS + 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3> + 2601513466U, // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3> + 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7> + 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS + 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS + 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u> + 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2> + 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2> + 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS + 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6> + 2601521658U, // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3> + 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u> + 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS + 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS + 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS + 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2> + 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5> + 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5> + 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1> + 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0> + 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0> + 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS + 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1> + 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4> + 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4> + 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7> + 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4> + 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0> + 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5> + 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1> + 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1> + 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4> + 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4> + 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4> + 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5> + 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4> + 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3> + 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3> + 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5> + 2666752099U, // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5> + 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS + 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4> + 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4> + 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3> + 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4> + 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0> + 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5> + 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7> + 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4> + 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS + 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4> + 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3> + 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4> + 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4> + 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5> + 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4> + 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4> + 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5> + 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS + 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5> + 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3> + 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2> + 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS + 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5> + 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS + 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS + 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS + 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS + 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7> + 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2> + 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6> + 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS + 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5> + 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7> + 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5> + 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS + 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS + 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4> + 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7> + 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7> + 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS + 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5> + 94817590U, // <5,4,7,6>: Cost 1 vrev RHS + 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7> + 94965064U, // <5,4,7,u>: Cost 1 vrev RHS + 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS + 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u> + 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u> + 2667419628U, // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4> + 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS + 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5> + 94825783U, // <5,4,u,6>: Cost 1 vrev RHS + 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5> + 94973257U, // <5,4,u,u>: Cost 1 vrev RHS + 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0> + 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS + 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2> + 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2> + 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1> + 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0> + 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7> + 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS + 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS + 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2> + 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5> + 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0> + 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3> + 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5> + 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7> + 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7> + 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3> + 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5> + 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS + 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3> + 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2> + 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4> + 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3> + 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3> + 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7> + 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7> + 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4> + 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2> + 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5> + 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3> + 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5> + 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6> + 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5> + 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7> + 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1> + 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2> + 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1> + 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5> + 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3> + 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4> + 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> + 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS + 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5> + 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6> + 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5> + 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS + 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3> + 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2> + 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2> + 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS + 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0> + 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7> + 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS + 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS + 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6> + 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3> + 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6> + 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5> + 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5> + 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6> + 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1> + 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1> + 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS + 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7> + 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5> + 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7> + 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS + 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5> + 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6> + 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS + 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS + 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS + 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS + 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5> + 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u> + 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS + 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7> + 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS + 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS + 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0> + 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS + 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2> + 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4> + 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5> + 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6> + 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7> + 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS + 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS + 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2> + 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1> + 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0> + 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3> + 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6> + 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7> + 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7> + 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS + 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6> + 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2> + 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3> + 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2> + 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1> + 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6> + 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6> + 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7> + 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3> + 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6> + 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2> + 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3> + 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6> + 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3> + 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6> + 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6> + 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7> + 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS + 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6> + 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS + 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5> + 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5> + 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5> + 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6> + 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS + 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6> + 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5> + 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS + 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS + 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3> + 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6> + 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4> + 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6> + 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5> + 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1> + 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS + 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS + 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS + 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4> + 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3> + 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6> + 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS + 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6> + 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6> + 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS + 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS + 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS + 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> + 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> + 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS + 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6> + 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6> + 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7> + 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS + 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS + 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS + 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> + 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS + 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS + 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3> + 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2> + 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS + 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0> + 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS + 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2> + 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0> + 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5> + 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7> + 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7> + 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0> + 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS + 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2> + 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1> + 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0> + 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7> + 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS + 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7> + 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7> + 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7> + 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7> + 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7> + 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3> + 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2> + 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1> + 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7> + 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7> + 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7> + 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7> + 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1> + 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2> + 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5> + 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1> + 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3> + 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6> + 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0> + 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7> + 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7> + 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2> + 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS + 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7> + 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0> + 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4> + 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS + 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS + 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> + 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7> + 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS + 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS + 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3> + 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3> + 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7> + 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS + 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5> + 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7> + 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS + 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS + 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0> + 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5> + 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2> + 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6> + 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4> + 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u> + 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> + 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7> + 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u> + 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS + 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1> + 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2> + 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3> + 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS + 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7> + 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3> + 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7> + 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS + 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS + 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS + 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0> + 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS + 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS + 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS + 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7> + 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS + 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS + 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0> + 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS + 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2> + 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2> + 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1> + 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1> + 2266134675U, // <5,u,0,6>: Cost 3 vrev <u,5,6,0> + 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0> + 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS + 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2> + 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1> + 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS + 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u> + 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u> + 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0> + 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u> + 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS + 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0> + 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> + 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2> + 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3> + 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u> + 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u> + 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7> + 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3> + 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1> + 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1> + 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u> + 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u> + 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3> + 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u> + 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0> + 2734610422U, // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5> + 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u> + 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u> + 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u> + 1661163546U, // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5> + 2734463012U, // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6> + 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5> + 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> + 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS + 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> + 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u> + 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS + 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS + 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5> + 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3> + 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7> + 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS + 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS + 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS + 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS + 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS + 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS + 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6> + 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7> + 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS + 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS + 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> + 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS + 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7> + 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS + 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7> + 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2> + 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS + 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS + 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> + 118708378U, // <5,u,7,6>: Cost 1 vrev RHS + 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS + 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS + 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS + 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS + 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS + 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS + 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS + 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS + 118716571U, // <5,u,u,6>: Cost 1 vrev RHS + 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS + 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS + 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0> + 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1> + 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2> + 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5> + 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6> + 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0> + 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6> + 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7> + 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2> + 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS + 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0> + 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS + 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6> + 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS + 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1> + 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1> + 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1> + 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS + 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2> + 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6> + 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6> + 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5> + 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6> + 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7> + 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6> + 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2> + 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6> + 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2> + 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4> + 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5> + 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3> + 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6> + 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6> + 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6> + 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7> + 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5> + 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6> + 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5> + 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6> + 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6> + 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6> + 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS + 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0> + 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0> + 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6> + 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS + 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6> + 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6> + 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0> + 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6> + 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6> + 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0> + 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS + 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS + 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0> + 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS + 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS + 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5> + 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0> + 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7> + 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0> + 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1> + 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS + 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS + 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0> + 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2> + 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7> + 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS + 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5> + 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0> + 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7> + 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS + 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2> + 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1> + 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS + 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5> + 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6> + 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS + 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u> + 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS + 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS + 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS + 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS + 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6> + 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2> + 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS + 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2> + 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1> + 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0> + 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2> + 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1> + 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1> + 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6> + 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3> + 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6> + 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5> + 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6> + 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1> + 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3> + 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS + 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3> + 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2> + 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0> + 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS + 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3> + 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3> + 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0> + 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0> + 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS + 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3> + 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6> + 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1> + 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6> + 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7> + 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3> + 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2> + 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3> + 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1> + 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6> + 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4> + 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6> + 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS + 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6> + 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0> + 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1> + 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6> + 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1> + 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7> + 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6> + 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7> + 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6> + 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6> + 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0> + 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS + 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7> + 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS + 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7> + 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6> + 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS + 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS + 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7> + 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6> + 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1> + 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS + 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS + 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7> + 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2> + 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS + 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS + 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5> + 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0> + 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7> + 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS + 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS + 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3> + 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6> + 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0> + 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6> + 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7> + 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u> + 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u> + 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0> + 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0> + 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS + 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2> + 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0> + 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6> + 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3> + 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4> + 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0> + 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS + 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1> + 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1> + 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0> + 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS + 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6> + 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7> + 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3> + 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1> + 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1> + 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1> + 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3> + 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2> + 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3> + 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6> + 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7> + 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6> + 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7> + 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3> + 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1> + 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0> + 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6> + 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4> + 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5> + 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6> + 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6> + 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4> + 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1> + 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> + 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u> + 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6> + 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6> + 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6> + 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS + 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0> + 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2> + 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2> + 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3> + 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3> + 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7> + 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6> + 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5> + 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5> + 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0> + 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS + 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6> + 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1> + 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3> + 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6> + 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7> + 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5> + 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7> + 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6> + 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1> + 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7> + 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS + 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2> + 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7> + 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS + 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS + 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7> + 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> + 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7> + 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS + 1591744256U, // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2> + 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS + 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6> + 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS + 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5> + 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS + 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0> + 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS + 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS + 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0> + 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2> + 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4> + 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2> + 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2> + 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2> + 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0> + 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0> + 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2> + 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3> + 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1> + 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3> + 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1> + 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> + 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3> + 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0> + 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3> + 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> + 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4> + 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> + 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2> + 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0> + 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6> + 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7> + 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7> + 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6> + 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> + 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1> + 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3> + 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3> + 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3> + 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6> + 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5> + 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7> + 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7> + 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5> + 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS + 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3> + 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3> + 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6> + 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS + 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6> + 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6> + 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4> + 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6> + 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS + 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7> + 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5> + 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5> + 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6> + 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7> + 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6> + 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0> + 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6> + 2602164326U, // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS + 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3> + 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6> + 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1> + 2602167524U, // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6> + 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7> + 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6> + 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7> + 2602170158U, // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS + 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS + 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7> + 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7> + 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2> + 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS + 2602176208U, // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3> + 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3> + 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7> + 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS + 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS + 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2> + 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u> + 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2> + 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS + 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6> + 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0> + 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7> + 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS + 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0> + 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS + 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6> + 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1> + 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6> + 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1> + 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2> + 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0> + 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS + 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1> + 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1> + 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0> + 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS + 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS + 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0> + 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3> + 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1> + 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1> + 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4> + 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3> + 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2> + 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1> + 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4> + 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS + 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0> + 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0> + 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4> + 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2> + 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3> + 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6> + 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3> + 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6> + 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6> + 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5> + 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7> + 2668817222U, // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6> + 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS + 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4> + 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4> + 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4> + 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4> + 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS + 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> + 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4> + 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> + 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS + 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3> + 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5> + 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6> + 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS + 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5> + 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS + 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS + 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS + 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS + 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2> + 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2> + 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2> + 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS + 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3> + 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3> + 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2> + 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS + 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS + 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4> + 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5> + 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7> + 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS + 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5> + 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6> + 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7> + 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS + 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS + 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS + 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u> + 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6> + 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6> + 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS + 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS + 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS + 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS + 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0> + 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS + 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6> + 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2> + 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1> + 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5> + 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7> + 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0> + 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0> + 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1> + 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5> + 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0> + 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS + 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6> + 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7> + 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4> + 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3> + 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3> + 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS + 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5> + 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2> + 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5> + 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS + 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6> + 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7> + 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS + 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5> + 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2> + 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5> + 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3> + 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3> + 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6> + 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6> + 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0> + 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3> + 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3> + 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS + 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5> + 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5> + 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5> + 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS + 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS + 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0> + 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6> + 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6> + 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS + 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2> + 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2> + 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2> + 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS + 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5> + 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6> + 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7> + 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7> + 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1> + 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4> + 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4> + 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4> + 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5> + 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5> + 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6> + 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0> + 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1> + 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS + 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7> + 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7> + 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2> + 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS + 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5> + 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6> + 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS + 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS + 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS + 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u> + 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u> + 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7> + 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS + 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS + 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6> + 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u> + 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u> + 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS + 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS + 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4> + 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1> + 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2> + 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3> + 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0> + 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS + 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS + 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2> + 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1> + 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0> + 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS + 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3> + 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7> + 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3> + 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3> + 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS + 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6> + 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3> + 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6> + 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1> + 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3> + 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7> + 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7> + 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3> + 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3> + 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2> + 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4> + 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6> + 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3> + 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5> + 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4> + 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6> + 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS + 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5> + 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS + 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2> + 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6> + 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6> + 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> + 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS + 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0> + 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6> + 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6> + 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS + 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3> + 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5> + 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0> + 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6> + 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5> + 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0> + 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS + 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS + 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS + 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2> + 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3> + 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2> + 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS + 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3> + 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS + 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7> + 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS + 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS + 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7> + 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7> + 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6> + 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS + 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4> + 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6> + 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS + 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS + 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS + 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS + 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u> + 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS + 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS + 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS + 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS + 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS + 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS + 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS + 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0> + 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0> + 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7> + 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2> + 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS + 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> + 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3> + 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5> + 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7> + 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7> + 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2> + 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3> + 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2> + 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3> + 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> + 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6> + 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7> + 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7> + 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1> + 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1> + 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3> + 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1> + 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7> + 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7> + 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1> + 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3> + 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> + 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5> + 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS + 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> + 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7> + 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS + 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> + 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> + 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> + 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> + 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> + 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> + 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2> + 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3> + 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> + 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6> + 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7> + 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> + 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> + 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> + 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> + 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7> + 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3> + 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7> + 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> + 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7> + 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7> + 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7> + 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2> + 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2> + 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS + 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0> + 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1> + 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6> + 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS + 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7> + 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1> + 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS + 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS + 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2> + 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0> + 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7> + 1193130221U, // <6,u,0,7>: Cost 2 vrev <u,6,7,0> + 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS + 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3> + 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS + 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7> + 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7> + 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3> + 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS + 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS + 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3> + 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1> + 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS + 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u> + 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7> + 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3> + 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1> + 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3> + 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3> + 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7> + 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5> + 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1> + 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3> + 1661245476U, // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6> + 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6> + 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS + 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> + 1661614161U, // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6> + 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS + 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS + 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5> + 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7> + 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS + 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7> + 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS + 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS + 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2> + 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7> + 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS + 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5> + 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS + 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1> + 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS + 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS + 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7> + 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7> + 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS + 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS + 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5> + 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7> + 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS + 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS + 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS + 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS + 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1> + 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS + 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS + 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS + 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS + 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS + 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0> + 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1> + 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2> + 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0> + 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1> + 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6> + 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0> + 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7> + 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1> + 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS + 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5> + 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS + 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7> + 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS + 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7> + 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1> + 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0> + 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS + 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2> + 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5> + 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0> + 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1> + 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6> + 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> + 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7> + 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7> + 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2> + 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2> + 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0> + 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0> + 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3> + 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6> + 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0> + 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0> + 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7> + 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0> + 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4> + 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5> + 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6> + 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4> + 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6> + 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS + 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6> + 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5> + 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5> + 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS + 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3> + 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> + 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0> + 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6> + 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5> + 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0> + 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7> + 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0> + 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7> + 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> + 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7> + 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0> + 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7> + 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7> + 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6> + 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1> + 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7> + 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2> + 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS + 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS + 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7> + 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6> + 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7> + 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0> + 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7> + 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2> + 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2> + 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1> + 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS + 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u> + 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6> + 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS + 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u> + 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1> + 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS + 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS + 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS + 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS + 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2> + 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS + 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> + 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0> + 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1> + 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2> + 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1> + 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1> + 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6> + 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3> + 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5> + 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7> + 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1> + 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5> + 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3> + 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1> + 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3> + 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2> + 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0> + 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5> + 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3> + 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2> + 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0> + 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0> + 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0> + 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3> + 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0> + 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7> + 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5> + 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7> + 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7> + 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3> + 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7> + 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5> + 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5> + 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5> + 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5> + 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5> + 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS + 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1> + 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0> + 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS + 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS + 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7> + 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6> + 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7> + 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS + 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7> + 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1> + 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1> + 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7> + 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7> + 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7> + 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> + 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7> + 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7> + 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7> + 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7> + 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1> + 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7> + 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2> + 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1> + 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3> + 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS + 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS + 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7> + 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0> + 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7> + 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS + 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS + 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3> + 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0> + 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7> + 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS + 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7> + 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7> + 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS + 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7> + 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2> + 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2> + 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0> + 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0> + 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6> + 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7> + 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1> + 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2> + 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7> + 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3> + 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0> + 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0> + 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1> + 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3> + 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0> + 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3> + 2602718850U, // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2> + 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1> + 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1> + 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3> + 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3> + 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5> + 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7> + 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6> + 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5> + 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3> + 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1> + 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5> + 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6> + 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7> + 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5> + 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7> + 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6> + 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0> + 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1> + 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6> + 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3> + 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4> + 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5> + 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6> + 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7> + 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0> + 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0> + 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7> + 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7> + 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3> + 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7> + 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7> + 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7> + 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7> + 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7> + 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0> + 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7> + 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS + 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3> + 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7> + 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7> + 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS + 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7> + 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7> + 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7> + 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7> + 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1> + 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0> + 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5> + 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS + 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS + 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7> + 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6> + 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7> + 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS + 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1> + 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5> + 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7> + 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5> + 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7> + 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7> + 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0> + 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7> + 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0> + 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2> + 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0> + 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2> + 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1> + 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2> + 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0> + 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0> + 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2> + 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1> + 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1> + 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3> + 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5> + 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6> + 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3> + 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1> + 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5> + 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5> + 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1> + 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0> + 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2> + 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0> + 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5> + 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4> + 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3> + 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3> + 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0> + 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1> + 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3> + 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3> + 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3> + 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4> + 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7> + 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3> + 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7> + 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3> + 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1> + 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2> + 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4> + 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5> + 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5> + 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6> + 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6> + 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4> + 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6> + 2602819686U, // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS + 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3> + 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3> + 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7> + 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5> + 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7> + 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0> + 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0> + 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3> + 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7> + 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3> + 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3> + 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7> + 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7> + 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7> + 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6> + 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7> + 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3> + 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1> + 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5> + 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6> + 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7> + 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5> + 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7> + 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7> + 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7> + 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1> + 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u> + 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2> + 1593153452U, // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3> + 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3> + 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u> + 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6> + 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3> + 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0> + 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2> + 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0> + 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS + 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2> + 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1> + 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5> + 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> + 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> + 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> + 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1> + 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2> + 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1> + 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3> + 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7> + 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3> + 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0> + 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3> + 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3> + 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0> + 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1> + 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3> + 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2> + 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1> + 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7> + 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> + 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0> + 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5> + 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0> + 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2> + 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5> + 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1> + 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3> + 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6> + 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4> + 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5> + 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7> + 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4> + 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2> + 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1> + 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4> + 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3> + 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4> + 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5> + 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> + 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7> + 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5> + 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS + 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7> + 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3> + 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5> + 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS + 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7> + 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS + 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7> + 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS + 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1> + 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5> + 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3> + 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4> + 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6> + 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7> + 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7> + 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> + 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2> + 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2> + 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2> + 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5> + 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6> + 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1> + 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> + 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1> + 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7> + 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7> + 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS + 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS + 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3> + 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u> + 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6> + 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1> + 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS + 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5> + 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS + 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS + 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS + 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS + 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0> + 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1> + 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1> + 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1> + 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2> + 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS + 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1> + 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1> + 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0> + 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7> + 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5> + 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3> + 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1> + 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> + 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> + 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1> + 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0> + 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7> + 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4> + 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7> + 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> + 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3> + 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3> + 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4> + 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS + 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5> + 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5> + 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3> + 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS + 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5> + 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5> + 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0> + 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0> + 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS + 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7> + 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5> + 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4> + 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS + 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS + 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5> + 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> + 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6> + 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1> + 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3> + 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3> + 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3> + 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4> + 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5> + 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> + 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> + 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7> + 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1> + 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7> + 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2> + 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4> + 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5> + 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> + 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> + 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0> + 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0> + 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS + 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3> + 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2> + 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7> + 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS + 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> + 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> + 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1> + 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3> + 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS + 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS + 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0> + 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u> + 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS + 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7> + 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7> + 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0> + 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3> + 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0> + 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS + 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4> + 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0> + 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2> + 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0> + 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0> + 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2> + 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS + 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1> + 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1> + 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0> + 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3> + 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS + 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7> + 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> + 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3> + 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1> + 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1> + 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3> + 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2> + 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1> + 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5> + 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7> + 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7> + 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> + 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> + 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2> + 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3> + 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3> + 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3> + 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5> + 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7> + 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6> + 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0> + 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0> + 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1> + 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3> + 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5> + 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4> + 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6> + 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS + 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0> + 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5> + 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS + 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS + 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3> + 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7> + 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4> + 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6> + 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5> + 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0> + 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7> + 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5> + 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1> + 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3> + 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3> + 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4> + 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4> + 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7> + 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6> + 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> + 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7> + 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1> + 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0> + 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2> + 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0> + 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5> + 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4> + 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6> + 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7> + 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1> + 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1> + 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS + 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2> + 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0> + 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5> + 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS + 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6> + 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3> + 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1> + 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0> + 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2> + 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0> + 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0> + 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1> + 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0> + 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0> + 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2> + 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2> + 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3> + 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1> + 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3> + 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5> + 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS + 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3> + 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1> + 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1> + 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3> + 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5> + 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0> + 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2> + 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0> + 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5> + 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3> + 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3> + 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3> + 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3> + 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1> + 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3> + 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6> + 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7> + 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5> + 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3> + 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7> + 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7> + 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7> + 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5> + 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7> + 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3> + 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4> + 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4> + 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6> + 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4> + 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6> + 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6> + 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS + 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7> + 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3> + 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7> + 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7> + 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7> + 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7> + 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5> + 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7> + 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS + 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0> + 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7> + 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7> + 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS + 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7> + 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7> + 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0> + 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7> + 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS + 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2> + 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2> + 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2> + 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS + 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7> + 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7> + 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS + 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS + 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS + 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2> + 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3> + 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0> + 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS + 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6> + 1595840756U, // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7> + 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS + 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS + 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0> + 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2> + 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2> + 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2> + 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1> + 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1> + 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2> + 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS + 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2> + 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS + 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1> + 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS + 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3> + 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS + 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3> + 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1> + 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3> + 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS + 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2> + 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0> + 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0> + 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6> + 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4> + 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3> + 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3> + 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0> + 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1> + 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3> + 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6> + 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3> + 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5> + 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7> + 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7> + 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0> + 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1> + 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1> + 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5> + 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6> + 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5> + 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4> + 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6> + 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6> + 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6> + 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6> + 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS + 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u> + 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3> + 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7> + 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u> + 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u> + 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS + 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7> + 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS + 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1> + 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7> + 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u> + 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7> + 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6> + 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7> + 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u> + 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0> + 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7> + 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1> + 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3> + 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6> + 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7> + 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5> + 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7> + 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2> + 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS + 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS + 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1> + 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2> + 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS + 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0> + 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5> + 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6> + 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS + 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS + 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS + 135053414U, // <u,0,0,0>: Cost 1 vdup0 LHS + 1611489290U, // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> + 1611489300U, // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> + 2568054923U, // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> + 1481706806U, // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS + 2555449040U, // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3> + 2591282078U, // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0> + 2591945711U, // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> + 135053414U, // <u,0,0,u>: Cost 1 vdup0 LHS + 1493655654U, // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS + 1860550758U, // <u,0,1,1>: Cost 2 vzipl LHS, LHS + 537747563U, // <u,0,1,2>: Cost 1 vext3 LHS, LHS + 2625135576U, // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3> + 1493658934U, // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS + 2625135760U, // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7> + 1517548447U, // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1> + 2591290362U, // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2> + 537747612U, // <u,0,1,u>: Cost 1 vext3 LHS, LHS + 1611489444U, // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> + 2685231276U, // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> + 1994768486U, // <u,0,2,2>: Cost 2 vtrnl LHS, LHS + 2685231294U, // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> + 1611489484U, // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> + 2712068310U, // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> + 2625136570U, // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7> + 2591962097U, // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> + 1611489516U, // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> + 2954067968U, // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> + 2685231356U, // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> + 72589981U, // <u,0,3,2>: Cost 1 vrev LHS + 2625137052U, // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3> + 2625137154U, // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6> + 2639071848U, // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0> + 2639735481U, // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0> + 2597279354U, // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3> + 73032403U, // <u,0,3,u>: Cost 1 vrev LHS + 2687074636U, // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u> + 1611489618U, // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> + 1611489628U, // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> + 3629222038U, // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2> + 2555481398U, // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS + 1551396150U, // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS + 2651680116U, // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6> + 2646150600U, // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> + 1611932050U, // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> + 2561458278U, // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS + 1863532646U, // <u,0,5,1>: Cost 2 vzipl RHS, LHS + 2712068526U, // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> + 2649689976U, // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0> + 2220237489U, // <u,0,5,4>: Cost 3 vrev <0,u,4,5> + 2651680772U, // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5> + 1577939051U, // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0> + 2830077238U, // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS + 1579266317U, // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0> + 2555494502U, // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS + 2712068598U, // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> + 1997750374U, // <u,0,6,2>: Cost 2 vtrnl RHS, LHS + 2655662673U, // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0> + 2555497782U, // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS + 2651681459U, // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u> + 2651681592U, // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6> + 2651681614U, // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1> + 1997750428U, // <u,0,6,u>: Cost 2 vtrnl RHS, LHS + 2567446630U, // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS + 2567447446U, // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0> + 2567448641U, // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7> + 2573421338U, // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7> + 2567449910U, // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS + 2651682242U, // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u> + 2591339429U, // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7> + 2651682412U, // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7> + 2567452462U, // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS + 135053414U, // <u,0,u,0>: Cost 1 vdup0 LHS + 1611489938U, // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> + 537748125U, // <u,0,u,2>: Cost 1 vext3 LHS, LHS + 2685674148U, // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> + 1611932338U, // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> + 1551399066U, // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS + 1517605798U, // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u> + 2830077481U, // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS + 537748179U, // <u,0,u,u>: Cost 1 vext3 LHS, LHS + 1544101961U, // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1> + 1558036582U, // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS + 2619171051U, // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1> + 1611490038U, // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> + 2555522358U, // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS + 2712068871U, // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> + 2591355815U, // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0> + 2597328512U, // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0> + 1611490083U, // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> + 1481785446U, // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS + 202162278U, // <u,1,1,1>: Cost 1 vdup1 LHS + 2555528808U, // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2> + 1611490120U, // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> + 1481788726U, // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS + 2689876828U, // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> + 2591364008U, // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1> + 2592691274U, // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> + 202162278U, // <u,1,1,u>: Cost 1 vdup1 LHS + 1499709542U, // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS + 2689876871U, // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> + 2631116445U, // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1> + 835584U, // <u,1,2,3>: Cost 0 copy LHS + 1499712822U, // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS + 2689876907U, // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> + 2631780282U, // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7> + 1523603074U, // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2> + 835584U, // <u,1,2,u>: Cost 0 copy LHS + 1487773798U, // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS + 1611490264U, // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> + 2685232094U, // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> + 2018746470U, // <u,1,3,3>: Cost 2 vtrnr LHS, LHS + 1487777078U, // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS + 1611490304U, // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> + 2685674505U, // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> + 2640407307U, // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1> + 1611490327U, // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> + 1567992749U, // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1> + 2693121070U, // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u> + 2693194807U, // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u> + 1152386432U, // <u,1,4,3>: Cost 2 vrev <1,u,3,4> + 2555555126U, // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS + 1558039862U, // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS + 2645716371U, // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1> + 2597361284U, // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4> + 1152755117U, // <u,1,4,u>: Cost 2 vrev <1,u,u,4> + 1481818214U, // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS + 2555560694U, // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2> + 2555561576U, // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2> + 1611490448U, // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> + 1481821494U, // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS + 2651025435U, // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1> + 2651689068U, // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1> + 2823966006U, // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS + 1611932861U, // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> + 2555568230U, // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS + 2689877199U, // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> + 2712069336U, // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> + 2685232353U, // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> + 2555571510U, // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS + 2689877235U, // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> + 2657661765U, // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1> + 1584583574U, // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1> + 1585247207U, // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1> + 2561548390U, // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS + 2561549681U, // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7> + 2573493926U, // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1> + 2042962022U, // <u,1,7,3>: Cost 2 vtrnr RHS, LHS + 2561551670U, // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS + 2226300309U, // <u,1,7,5>: Cost 3 vrev <1,u,5,7> + 2658325990U, // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u> + 2658326124U, // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7> + 2042962027U, // <u,1,7,u>: Cost 2 vtrnr RHS, LHS + 1481842790U, // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS + 202162278U, // <u,1,u,1>: Cost 1 vdup1 LHS + 2685674867U, // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> + 835584U, // <u,1,u,3>: Cost 0 copy LHS + 1481846070U, // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS + 1611933077U, // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> + 2685674910U, // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> + 1523652232U, // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u> + 835584U, // <u,1,u,u>: Cost 0 copy LHS + 1544110154U, // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2> + 1545437286U, // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS + 1545437420U, // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2> + 2685232589U, // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> + 2619179346U, // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5> + 2712069606U, // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7> + 2689877484U, // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> + 2659656273U, // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u> + 1545437853U, // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS + 1550082851U, // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2> + 2619179828U, // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1> + 2619179926U, // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0> + 2685232671U, // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> + 2555604278U, // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS + 2619180176U, // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7> + 2689877564U, // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> + 2602718850U, // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2> + 1158703235U, // <u,2,1,u>: Cost 2 vrev <2,u,u,1> + 1481867366U, // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS + 2555609846U, // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2> + 269271142U, // <u,2,2,2>: Cost 1 vdup2 LHS + 1611490930U, // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> + 1481870646U, // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS + 2689877640U, // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> + 2619180986U, // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7> + 2593436837U, // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> + 269271142U, // <u,2,2,u>: Cost 1 vdup2 LHS + 408134301U, // <u,2,3,0>: Cost 1 vext1 LHS, LHS + 1481876214U, // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1481877096U, // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1880326246U, // <u,2,3,3>: Cost 2 vzipr LHS, LHS + 408137014U, // <u,2,3,4>: Cost 1 vext1 LHS, RHS + 1529654992U, // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> + 1529655802U, // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> + 1529656314U, // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> + 408139566U, // <u,2,3,u>: Cost 1 vext1 LHS, LHS + 1567853468U, // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> + 2561598362U, // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4> + 2555627214U, // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5> + 2685232918U, // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> + 2555628854U, // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS + 1545440566U, // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS + 1571982740U, // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2> + 2592125957U, // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> + 1545440809U, // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS + 2555633766U, // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS + 2561606550U, // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0> + 2689877856U, // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> + 2685233000U, // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> + 1158441059U, // <u,2,5,4>: Cost 2 vrev <2,u,4,5> + 2645725188U, // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5> + 2689877892U, // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> + 2823900470U, // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS + 1158736007U, // <u,2,5,u>: Cost 2 vrev <2,u,u,5> + 1481900134U, // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS + 2555642614U, // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2> + 2555643496U, // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2> + 1611491258U, // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> + 1481903414U, // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS + 2689877964U, // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> + 2689877973U, // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> + 2645726030U, // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1> + 1611933671U, // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> + 1585919033U, // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2> + 2573566710U, // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2> + 2567596115U, // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7> + 1906901094U, // <u,2,7,3>: Cost 2 vzipr RHS, LHS + 2555653430U, // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS + 2800080230U, // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6> + 2980643164U, // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> + 2645726828U, // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7> + 1906901099U, // <u,2,7,u>: Cost 2 vzipr RHS, LHS + 408175266U, // <u,2,u,0>: Cost 1 vext1 LHS, LHS + 1545443118U, // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS + 269271142U, // <u,2,u,2>: Cost 1 vdup2 LHS + 1611491416U, // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> + 408177974U, // <u,2,u,4>: Cost 1 vext1 LHS, RHS + 1545443482U, // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS + 1726339226U, // <u,2,u,6>: Cost 2 vuzpl LHS, RHS + 1529697274U, // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> + 408180526U, // <u,2,u,u>: Cost 1 vext1 LHS, LHS + 1544781824U, // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 471040156U, // <u,3,0,1>: Cost 1 vext2 LHS, LHS + 1544781988U, // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 2618523900U, // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0> + 1544782162U, // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2238188352U, // <u,3,0,5>: Cost 3 vrev <3,u,5,0> + 2623169023U, // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> + 2238335826U, // <u,3,0,7>: Cost 3 vrev <3,u,7,0> + 471040669U, // <u,3,0,u>: Cost 1 vext2 LHS, LHS + 1544782582U, // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544782644U, // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544782742U, // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1544782808U, // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2618524733U, // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5> + 1544782992U, // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2618524897U, // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7> + 2703517987U, // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u> + 1544783213U, // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> + 1529716838U, // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS + 1164167966U, // <u,3,2,1>: Cost 2 vrev <3,u,1,2> + 1544783464U, // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> + 1544783526U, // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> + 1529720118U, // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS + 2618525544U, // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6> + 1544783802U, // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 2704181620U, // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u> + 1544783931U, // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> + 1544784022U, // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> + 1487922559U, // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3> + 1493895256U, // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3> + 336380006U, // <u,3,3,3>: Cost 1 vdup3 LHS + 1544784386U, // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> + 2824054478U, // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> + 2238286668U, // <u,3,3,6>: Cost 3 vrev <3,u,6,3> + 2954069136U, // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> + 336380006U, // <u,3,3,u>: Cost 1 vdup3 LHS + 1487929446U, // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS + 1487930752U, // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4> + 2623171644U, // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> + 2561673366U, // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2> + 1487932726U, // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS + 471043382U, // <u,3,4,5>: Cost 1 vext2 LHS, RHS + 1592561012U, // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> + 2238368598U, // <u,3,4,7>: Cost 3 vrev <3,u,7,4> + 471043625U, // <u,3,4,u>: Cost 1 vext2 LHS, RHS + 2555707494U, // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS + 1574645465U, // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3> + 2567653106U, // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5> + 2555709954U, // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6> + 1592561606U, // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592561668U, // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1592561762U, // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> + 1750314294U, // <u,3,5,7>: Cost 2 vuzpr LHS, RHS + 1750314295U, // <u,3,5,u>: Cost 2 vuzpr LHS, RHS + 2623172897U, // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> + 2561688962U, // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6> + 1581281795U, // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3> + 2706541204U, // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u> + 2623173261U, // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> + 1164495686U, // <u,3,6,5>: Cost 2 vrev <3,u,5,6> + 1592562488U, // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1592562510U, // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> + 1164716897U, // <u,3,6,u>: Cost 2 vrev <3,u,u,6> + 1487954022U, // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS + 1487955331U, // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7> + 1493928028U, // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7> + 2561697942U, // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2> + 1487957302U, // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS + 2707352311U, // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u> + 2655024623U, // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u> + 1592563308U, // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1487959854U, // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS + 1544787667U, // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2> + 471045934U, // <u,3,u,1>: Cost 1 vext2 LHS, LHS + 1549432709U, // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0> + 336380006U, // <u,3,u,3>: Cost 1 vdup3 LHS + 1544788031U, // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6> + 471046298U, // <u,3,u,5>: Cost 1 vext2 LHS, RHS + 1549433040U, // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7> + 1750314537U, // <u,3,u,7>: Cost 2 vuzpr LHS, RHS + 471046501U, // <u,3,u,u>: Cost 1 vext2 LHS, LHS + 2625167360U, // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0> + 1551425638U, // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS + 2619195630U, // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4> + 2619343104U, // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> + 2625167698U, // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5> + 1638329234U, // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> + 1638329244U, // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> + 3787803556U, // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> + 1551426205U, // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS + 2555748454U, // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS + 2625168180U, // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1> + 1551426503U, // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4> + 2625168344U, // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3> + 2555751734U, // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS + 1860554038U, // <u,4,1,5>: Cost 2 vzipl LHS, RHS + 2689879022U, // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> + 2592248852U, // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> + 1555408301U, // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4> + 2555756646U, // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS + 2625168943U, // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u> + 2625169000U, // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2> + 2619197134U, // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5> + 2555759926U, // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS + 2712071222U, // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> + 1994771766U, // <u,4,2,6>: Cost 2 vtrnl LHS, RHS + 2592257045U, // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> + 1994771784U, // <u,4,2,u>: Cost 2 vtrnl LHS, RHS + 2625169558U, // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2> + 2567709594U, // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4> + 2567710817U, // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3> + 2625169820U, // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3> + 2625169922U, // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6> + 2954069710U, // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> + 2954068172U, // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> + 3903849472U, // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7> + 2954068174U, // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> + 1505919078U, // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS + 2567717831U, // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4> + 2567719010U, // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4> + 2570373542U, // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> + 161926454U, // <u,4,4,4>: Cost 1 vdup0 RHS + 1551428918U, // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS + 1638329572U, // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> + 2594927963U, // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> + 161926454U, // <u,4,4,u>: Cost 1 vdup0 RHS + 1493983334U, // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS + 2689879301U, // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> + 1493985379U, // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5> + 2567727254U, // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2> + 1493986614U, // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS + 1863535926U, // <u,4,5,5>: Cost 2 vzipl RHS, RHS + 537750838U, // <u,4,5,6>: Cost 1 vext3 LHS, RHS + 2830110006U, // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS + 537750856U, // <u,4,5,u>: Cost 1 vext3 LHS, RHS + 1482047590U, // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS + 2555790070U, // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2> + 2555790952U, // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2> + 2555791510U, // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2> + 1482050870U, // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS + 2689879422U, // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> + 1997753654U, // <u,4,6,6>: Cost 2 vtrnl RHS, RHS + 2712071562U, // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> + 1482053422U, // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS + 2567741542U, // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS + 2567742362U, // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4> + 2567743589U, // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7> + 2573716286U, // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7> + 2567744822U, // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS + 2712071624U, // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> + 96808489U, // <u,4,7,6>: Cost 1 vrev RHS + 2651715180U, // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7> + 96955963U, // <u,4,7,u>: Cost 1 vrev RHS + 1482063974U, // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS + 1551431470U, // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS + 1494009958U, // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u> + 2555807894U, // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2> + 161926454U, // <u,4,u,4>: Cost 1 vdup0 RHS + 1551431834U, // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS + 537751081U, // <u,4,u,6>: Cost 1 vext3 LHS, RHS + 2830110249U, // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS + 537751099U, // <u,4,u,u>: Cost 1 vext3 LHS, RHS + 2631811072U, // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0> + 1558069350U, // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS + 2619203823U, // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5> + 2619867456U, // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5> + 1546273106U, // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> + 2733010539U, // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> + 2597622682U, // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5> + 1176539396U, // <u,5,0,7>: Cost 2 vrev <5,u,7,0> + 1558069917U, // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS + 1505968230U, // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS + 2624512887U, // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5> + 2631811990U, // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0> + 2618541056U, // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7> + 1505971510U, // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS + 2627167419U, // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5> + 2579714554U, // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3> + 1638330064U, // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> + 1638477529U, // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> + 2561802342U, // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS + 2561803264U, // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7> + 2631149217U, // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5> + 1558071026U, // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5> + 2561805622U, // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS + 2714062607U, // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> + 2631813050U, // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7> + 3092335926U, // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS + 1561389191U, // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5> + 2561810534U, // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS + 2561811857U, // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3> + 2631813474U, // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u> + 2631813532U, // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3> + 2619869698U, // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6> + 3001847002U, // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5> + 2954070530U, // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> + 2018749750U, // <u,5,3,7>: Cost 2 vtrnr LHS, RHS + 2018749751U, // <u,5,3,u>: Cost 2 vtrnr LHS, RHS + 2573762662U, // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS + 2620017634U, // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> + 2573764338U, // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5> + 2573765444U, // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4> + 1570680053U, // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5> + 1558072630U, // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS + 2645749143U, // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5> + 1638330310U, // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> + 1558072873U, // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS + 1506000998U, // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS + 2561827984U, // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7> + 2579744360U, // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2> + 2579744918U, // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2> + 1506004278U, // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS + 229035318U, // <u,5,5,5>: Cost 1 vdup1 RHS + 2712072206U, // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> + 1638330392U, // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> + 229035318U, // <u,5,5,u>: Cost 1 vdup1 RHS + 1500037222U, // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS + 2561836436U, // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6> + 2567809133U, // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6> + 1500040006U, // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6> + 1500040502U, // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS + 2714062935U, // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> + 2712072288U, // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> + 27705344U, // <u,5,6,7>: Cost 0 copy RHS + 27705344U, // <u,5,6,u>: Cost 0 copy RHS + 1488101478U, // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS + 1488102805U, // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7> + 2561844840U, // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2> + 2561845398U, // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2> + 1488104758U, // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS + 1638330536U, // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> + 2712072362U, // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> + 2042965302U, // <u,5,7,7>: Cost 2 vtrnr RHS, RHS + 1488107310U, // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS + 1488109670U, // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS + 1488110998U, // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u> + 2561853032U, // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2> + 1500056392U, // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u> + 1488112950U, // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS + 229035318U, // <u,5,u,5>: Cost 1 vdup1 RHS + 2954111490U, // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> + 27705344U, // <u,5,u,7>: Cost 0 copy RHS + 27705344U, // <u,5,u,u>: Cost 0 copy RHS + 2619211776U, // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0> + 1545470054U, // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS + 1545470192U, // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6> + 2255958969U, // <u,6,0,3>: Cost 3 vrev <6,u,3,0> + 1546797458U, // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6> + 2720624971U, // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u> + 2256180180U, // <u,6,0,6>: Cost 3 vrev <6,u,6,0> + 2960682294U, // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS + 1545470621U, // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS + 1182004127U, // <u,6,1,0>: Cost 2 vrev <6,u,0,1> + 2619212596U, // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1> + 2619212694U, // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0> + 2619212760U, // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3> + 2626511979U, // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6> + 2619212944U, // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7> + 2714063264U, // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> + 2967326006U, // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS + 1182594023U, // <u,6,1,u>: Cost 2 vrev <6,u,u,1> + 1506050150U, // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS + 2579792630U, // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2> + 2619213416U, // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2> + 2619213478U, // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1> + 1506053430U, // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS + 2633148309U, // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6> + 2619213754U, // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7> + 1638330874U, // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> + 1638478339U, // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> + 2619213974U, // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2> + 2255836074U, // <u,6,3,1>: Cost 3 vrev <6,u,1,3> + 2255909811U, // <u,6,3,2>: Cost 3 vrev <6,u,2,3> + 2619214236U, // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3> + 1564715549U, // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6> + 2639121006U, // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6> + 3001847012U, // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> + 1880329526U, // <u,6,3,7>: Cost 2 vzipr LHS, RHS + 1880329527U, // <u,6,3,u>: Cost 2 vzipr LHS, RHS + 2567864422U, // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS + 2733011558U, // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3> + 2567866484U, // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4> + 2638458005U, // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u> + 1570540772U, // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> + 1545473334U, // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS + 1572015512U, // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6> + 2960715062U, // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS + 1545473577U, // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS + 2567872614U, // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS + 2645757648U, // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3> + 2567874490U, // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7> + 2576501250U, // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> + 1576660943U, // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6> + 2645757956U, // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5> + 2645758050U, // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0> + 2824080694U, // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS + 1182626795U, // <u,6,5,u>: Cost 2 vrev <6,u,u,5> + 1506082918U, // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS + 2579825398U, // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2> + 2645758458U, // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3> + 2579826838U, // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2> + 1506086198U, // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS + 2579828432U, // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3> + 296144182U, // <u,6,6,6>: Cost 1 vdup2 RHS + 1638331202U, // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> + 296144182U, // <u,6,6,u>: Cost 1 vdup2 RHS + 432349286U, // <u,6,7,0>: Cost 1 vext1 RHS, LHS + 1506091766U, // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> + 1506092648U, // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1506093206U, // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> + 432352809U, // <u,6,7,4>: Cost 1 vext1 RHS, RHS + 1506094800U, // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3> + 1506095610U, // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3> + 1906904374U, // <u,6,7,7>: Cost 2 vzipr RHS, RHS + 432355118U, // <u,6,7,u>: Cost 1 vext1 RHS, LHS + 432357478U, // <u,6,u,0>: Cost 1 vext1 RHS, LHS + 1545475886U, // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS + 1506100840U, // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1506101398U, // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> + 432361002U, // <u,6,u,4>: Cost 1 vext1 RHS, RHS + 1545476250U, // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS + 296144182U, // <u,6,u,6>: Cost 1 vdup2 RHS + 1880370486U, // <u,6,u,7>: Cost 2 vzipr LHS, RHS + 432363310U, // <u,6,u,u>: Cost 1 vext1 RHS, LHS + 1571356672U, // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497614950U, // <u,7,0,1>: Cost 1 vext2 RHS, LHS + 1571356836U, // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2573880146U, // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0> + 1571357010U, // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1512083716U, // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0> + 2621874741U, // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7> + 2585826298U, // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2> + 497615517U, // <u,7,0,u>: Cost 1 vext2 RHS, LHS + 1571357430U, // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571357492U, // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1571357590U, // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> + 1552114715U, // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7> + 2573888822U, // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS + 1553441981U, // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7> + 2627847438U, // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7> + 2727408775U, // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u> + 1555432880U, // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7> + 2629838337U, // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7> + 1188058754U, // <u,7,2,1>: Cost 2 vrev <7,u,1,2> + 1571358312U, // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1571358374U, // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> + 2632492869U, // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7> + 2633156502U, // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7> + 1560078311U, // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7> + 2728072408U, // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u> + 1561405577U, // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7> + 1571358870U, // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2627184913U, // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u> + 2633820523U, // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u> + 1571359132U, // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571359234U, // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 1512108295U, // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3> + 1518080992U, // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3> + 2640456465U, // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7> + 1571359518U, // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1571359634U, // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2573911067U, // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7> + 2645101622U, // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> + 2573912918U, // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4> + 1571359952U, // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497618248U, // <u,7,4,5>: Cost 1 vext2 RHS, RHS + 1571360116U, // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> + 2645102024U, // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0> + 497618473U, // <u,7,4,u>: Cost 1 vext2 RHS, RHS + 2645102152U, // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> + 1571360464U, // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2645102334U, // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> + 2645102447U, // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> + 1571360710U, // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571360772U, // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1571360866U, // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> + 1571360936U, // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> + 1571361017U, // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> + 1530044518U, // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS + 2645103016U, // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2> + 1571361274U, // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2645103154U, // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> + 1530047798U, // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS + 1188386474U, // <u,7,6,5>: Cost 2 vrev <7,u,5,6> + 1571361592U, // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> + 1571361614U, // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> + 1571361695U, // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> + 1571361786U, // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> + 2573935616U, // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7> + 2645103781U, // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2> + 2573937497U, // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7> + 1571362150U, // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> + 1512141067U, // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7> + 1518113764U, // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7> + 363253046U, // <u,7,7,7>: Cost 1 vdup3 RHS + 363253046U, // <u,7,7,u>: Cost 1 vdup3 RHS + 1571362515U, // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2> + 497620782U, // <u,7,u,1>: Cost 1 vext2 RHS, LHS + 1571362693U, // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0> + 1571362748U, // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1> + 1571362879U, // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6> + 497621146U, // <u,7,u,5>: Cost 1 vext2 RHS, RHS + 1571363024U, // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7> + 363253046U, // <u,7,u,7>: Cost 1 vdup3 RHS + 497621349U, // <u,7,u,u>: Cost 1 vext2 RHS, LHS + 135053414U, // <u,u,0,0>: Cost 1 vdup0 LHS + 471081121U, // <u,u,0,1>: Cost 1 vext2 LHS, LHS + 1544822948U, // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 1616140005U, // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2> + 1544823122U, // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 1512157453U, // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0> + 1662220032U, // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2> + 1194457487U, // <u,u,0,7>: Cost 2 vrev <u,u,7,0> + 471081629U, // <u,u,0,u>: Cost 1 vext2 LHS, LHS + 1544823542U, // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 202162278U, // <u,u,1,1>: Cost 1 vdup1 LHS + 537753390U, // <u,u,1,2>: Cost 1 vext3 LHS, LHS + 1544823768U, // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 1494248758U, // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS + 1544823952U, // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 1518138343U, // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1> + 1640322907U, // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3> + 537753444U, // <u,u,1,u>: Cost 1 vext3 LHS, LHS + 1482309734U, // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS + 1194031451U, // <u,u,2,1>: Cost 2 vrev <u,u,1,2> + 269271142U, // <u,u,2,2>: Cost 1 vdup2 LHS + 835584U, // <u,u,2,3>: Cost 0 copy LHS + 1482313014U, // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS + 2618566504U, // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> + 1544824762U, // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 1638479788U, // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3> + 835584U, // <u,u,2,u>: Cost 0 copy LHS + 408576723U, // <u,u,3,0>: Cost 1 vext1 LHS, LHS + 1482318582U, // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 120371557U, // <u,u,3,2>: Cost 1 vrev LHS + 336380006U, // <u,u,3,3>: Cost 1 vdup3 LHS + 408579382U, // <u,u,3,4>: Cost 1 vext1 LHS, RHS + 1616140271U, // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7> + 1530098170U, // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> + 1880329544U, // <u,u,3,7>: Cost 2 vzipr LHS, RHS + 408581934U, // <u,u,3,u>: Cost 1 vext1 LHS, LHS + 1488298086U, // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS + 1488299437U, // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4> + 1659271204U, // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6> + 1194195311U, // <u,u,4,3>: Cost 2 vrev <u,u,3,4> + 161926454U, // <u,u,4,4>: Cost 1 vdup0 RHS + 471084342U, // <u,u,4,5>: Cost 1 vext2 LHS, RHS + 1571368308U, // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> + 1640323153U, // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6> + 471084585U, // <u,u,4,u>: Cost 1 vext2 LHS, RHS + 1494278246U, // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS + 1571368656U, // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 1494280327U, // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5> + 1616140415U, // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7> + 1494281526U, // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS + 229035318U, // <u,u,5,5>: Cost 1 vdup1 RHS + 537753754U, // <u,u,5,6>: Cost 1 vext3 LHS, RHS + 1750355254U, // <u,u,5,7>: Cost 2 vuzpr LHS, RHS + 537753772U, // <u,u,5,u>: Cost 1 vext3 LHS, RHS + 1482342502U, // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS + 2556084982U, // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2> + 1571369466U, // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 1611938000U, // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7> + 1482345782U, // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS + 1194359171U, // <u,u,6,5>: Cost 2 vrev <u,u,5,6> + 296144182U, // <u,u,6,6>: Cost 1 vdup2 RHS + 27705344U, // <u,u,6,7>: Cost 0 copy RHS + 27705344U, // <u,u,6,u>: Cost 0 copy RHS + 432496742U, // <u,u,7,0>: Cost 1 vext1 RHS, LHS + 1488324016U, // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7> + 1494296713U, // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7> + 1906901148U, // <u,u,7,3>: Cost 2 vzipr RHS, LHS + 432500283U, // <u,u,7,4>: Cost 1 vext1 RHS, RHS + 1506242256U, // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> + 120699277U, // <u,u,7,6>: Cost 1 vrev RHS + 363253046U, // <u,u,7,7>: Cost 1 vdup3 RHS + 432502574U, // <u,u,7,u>: Cost 1 vext1 RHS, LHS + 408617688U, // <u,u,u,0>: Cost 1 vext1 LHS, LHS + 471086894U, // <u,u,u,1>: Cost 1 vext2 LHS, LHS + 537753957U, // <u,u,u,2>: Cost 1 vext3 LHS, LHS + 835584U, // <u,u,u,3>: Cost 0 copy LHS + 408620342U, // <u,u,u,4>: Cost 1 vext1 LHS, RHS + 471087258U, // <u,u,u,5>: Cost 1 vext2 LHS, RHS + 537753997U, // <u,u,u,6>: Cost 1 vext3 LHS, RHS + 27705344U, // <u,u,u,7>: Cost 0 copy RHS + 835584U, // <u,u,u,u>: Cost 0 copy LHS 0 }; diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index d5bc3f6..ad51bc1 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 305b232..22d15b5 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -201,6 +201,10 @@ def CPSR : ARMReg<0, "cpsr">; def FPSCR : ARMReg<1, "fpscr">; def ITSTATE : ARMReg<2, "itstate">; +// Special Registers - only available in privileged mode. +def FPSID : ARMReg<0, "fpsid">; +def FPEXC : ARMReg<8, "fpexc">; + // Register classes. // // pc == Program Counter @@ -256,7 +260,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, // restricted GPR register class. Many Thumb2 instructions allow the full // register range for operands, but have undefined behaviours when PC -// or SP (R13 or R15) are used. The ARM ARM refers to these operands +// or SP (R13 or R15) are used. The ARM ISA refers to these operands // via the BadReg() pseudo-code description. def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR]> { @@ -381,27 +385,29 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // VFP2 + // VFP2 / VFPv3-D16 static const unsigned ARM_DPR_VFP2[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, ARM::D4, ARM::D5, ARM::D6, ARM::D7, ARM::D8, ARM::D9, ARM::D10, ARM::D11, ARM::D12, ARM::D13, ARM::D14, ARM::D15 }; - // VFP3 + // VFP3: D8-D15 are callee saved and should be allocated last. + // Save other low registers for use as DPR_VFP2 and DPR_8 classes. static const unsigned ARM_DPR_VFP3[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10, ARM::D11, - ARM::D12, ARM::D13, ARM::D14, ARM::D15, ARM::D16, ARM::D17, ARM::D18, ARM::D19, ARM::D20, ARM::D21, ARM::D22, ARM::D23, ARM::D24, ARM::D25, ARM::D26, ARM::D27, - ARM::D28, ARM::D29, ARM::D30, ARM::D31 }; + ARM::D28, ARM::D29, ARM::D30, ARM::D31, + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10, ARM::D11, + ARM::D12, ARM::D13, ARM::D14, ARM::D15 }; + DPRClass::iterator DPRClass::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.hasVFP3()) + if (Subtarget.hasVFP3() && !Subtarget.hasD16()) return ARM_DPR_VFP3; return ARM_DPR_VFP2; } @@ -410,7 +416,7 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, DPRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); - if (Subtarget.hasVFP3()) + if (Subtarget.hasVFP3() && !Subtarget.hasD16()) return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned)); else return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned)); @@ -438,6 +444,29 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { let SubRegClasses = [(DPR dsub_0, dsub_1)]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Q4-Q7 are callee saved and should be allocated last. + // Save other low registers for use as QPR_VFP2 and QPR_8 classes. + static const unsigned ARM_QPR[] = { + ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11, + ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15, + ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, + ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7 }; + + QPRClass::iterator + QPRClass::allocation_order_begin(const MachineFunction &MF) const { + return ARM_QPR; + } + + QPRClass::iterator + QPRClass::allocation_order_end(const MachineFunction &MF) const { + return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned)); + } + }]; } // Subset of QPR that have 32-bit SPR subregs. @@ -463,6 +492,27 @@ def QQPR : RegisterClass<"ARM", [v4i64], [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), (QPR qsub_0, qsub_1)]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // QQ2-QQ3 are callee saved and should be allocated last. + // Save other low registers for use as QPR_VFP2 and QPR_8 classes. + static const unsigned ARM_QQPR[] = { + ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7, + ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 }; + + QQPRClass::iterator + QQPRClass::allocation_order_begin(const MachineFunction &MF) const { + return ARM_QQPR; + } + + QQPRClass::iterator + QQPRClass::allocation_order_end(const MachineFunction &MF) const { + return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned)); + } + }]; } // Subset of QQPR that have 32-bit SPR subregs. @@ -483,6 +533,26 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, dsub_4, dsub_5, dsub_6, dsub_7), (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // QQQQ1 is callee saved and should be allocated last. + // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes. + static const unsigned ARM_QQQQPR[] = { + ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 }; + + QQQQPRClass::iterator + QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const { + return ARM_QQQQPR; + } + + QQQQPRClass::iterator + QQQQPRClass::allocation_order_end(const MachineFunction &MF) const { + return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned)); + } + }]; } // Condition code registers. diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index b60ccca..958c5c6 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -14,42 +14,86 @@ def IIC_iALUx : InstrItinClass; def IIC_iALUi : InstrItinClass; def IIC_iALUr : InstrItinClass; def IIC_iALUsi : InstrItinClass; +def IIC_iALUsir : InstrItinClass; def IIC_iALUsr : InstrItinClass; +def IIC_iBITi : InstrItinClass; +def IIC_iBITr : InstrItinClass; +def IIC_iBITsi : InstrItinClass; +def IIC_iBITsr : InstrItinClass; def IIC_iUNAr : InstrItinClass; def IIC_iUNAsi : InstrItinClass; -def IIC_iUNAsr : InstrItinClass; +def IIC_iEXTr : InstrItinClass; +def IIC_iEXTAr : InstrItinClass; +def IIC_iEXTAsr : InstrItinClass; def IIC_iCMPi : InstrItinClass; def IIC_iCMPr : InstrItinClass; def IIC_iCMPsi : InstrItinClass; def IIC_iCMPsr : InstrItinClass; +def IIC_iTSTi : InstrItinClass; +def IIC_iTSTr : InstrItinClass; +def IIC_iTSTsi : InstrItinClass; +def IIC_iTSTsr : InstrItinClass; def IIC_iMOVi : InstrItinClass; def IIC_iMOVr : InstrItinClass; def IIC_iMOVsi : InstrItinClass; def IIC_iMOVsr : InstrItinClass; +def IIC_iMOVix2 : InstrItinClass; +def IIC_iMOVix2addpc : InstrItinClass; +def IIC_iMOVix2ld : InstrItinClass; +def IIC_iMVNi : InstrItinClass; +def IIC_iMVNr : InstrItinClass; +def IIC_iMVNsi : InstrItinClass; +def IIC_iMVNsr : InstrItinClass; def IIC_iCMOVi : InstrItinClass; def IIC_iCMOVr : InstrItinClass; def IIC_iCMOVsi : InstrItinClass; def IIC_iCMOVsr : InstrItinClass; +def IIC_iCMOVix2 : InstrItinClass; def IIC_iMUL16 : InstrItinClass; def IIC_iMAC16 : InstrItinClass; def IIC_iMUL32 : InstrItinClass; def IIC_iMAC32 : InstrItinClass; def IIC_iMUL64 : InstrItinClass; def IIC_iMAC64 : InstrItinClass; -def IIC_iLoadi : InstrItinClass; -def IIC_iLoadr : InstrItinClass; -def IIC_iLoadsi : InstrItinClass; -def IIC_iLoadiu : InstrItinClass; -def IIC_iLoadru : InstrItinClass; -def IIC_iLoadsiu : InstrItinClass; -def IIC_iLoadm : InstrItinClass; -def IIC_iStorei : InstrItinClass; -def IIC_iStorer : InstrItinClass; -def IIC_iStoresi : InstrItinClass; -def IIC_iStoreiu : InstrItinClass; -def IIC_iStoreru : InstrItinClass; -def IIC_iStoresiu : InstrItinClass; -def IIC_iStorem : InstrItinClass; +def IIC_iLoad_i : InstrItinClass; +def IIC_iLoad_r : InstrItinClass; +def IIC_iLoad_si : InstrItinClass; +def IIC_iLoad_iu : InstrItinClass; +def IIC_iLoad_ru : InstrItinClass; +def IIC_iLoad_siu : InstrItinClass; +def IIC_iLoad_bh_i : InstrItinClass; +def IIC_iLoad_bh_r : InstrItinClass; +def IIC_iLoad_bh_si : InstrItinClass; +def IIC_iLoad_bh_iu : InstrItinClass; +def IIC_iLoad_bh_ru : InstrItinClass; +def IIC_iLoad_bh_siu : InstrItinClass; +def IIC_iLoad_d_i : InstrItinClass; +def IIC_iLoad_d_r : InstrItinClass; +def IIC_iLoad_d_ru : InstrItinClass; +def IIC_iLoad_m : InstrItinClass<0>; // micro-coded +def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded +def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded +def IIC_iPop : InstrItinClass<0>; // micro-coded +def IIC_iPop_Br : InstrItinClass<0>; // micro-coded +def IIC_iLoadiALU : InstrItinClass; +def IIC_iStore_i : InstrItinClass; +def IIC_iStore_r : InstrItinClass; +def IIC_iStore_si : InstrItinClass; +def IIC_iStore_iu : InstrItinClass; +def IIC_iStore_ru : InstrItinClass; +def IIC_iStore_siu : InstrItinClass; +def IIC_iStore_bh_i : InstrItinClass; +def IIC_iStore_bh_r : InstrItinClass; +def IIC_iStore_bh_si : InstrItinClass; +def IIC_iStore_bh_iu : InstrItinClass; +def IIC_iStore_bh_ru : InstrItinClass; +def IIC_iStore_bh_siu : InstrItinClass; +def IIC_iStore_d_i : InstrItinClass; +def IIC_iStore_d_r : InstrItinClass; +def IIC_iStore_d_ru : InstrItinClass; +def IIC_iStore_m : InstrItinClass<0>; // micro-coded +def IIC_iStore_mu : InstrItinClass<0>; // micro-coded +def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; @@ -80,19 +124,76 @@ def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; -def IIC_fpLoadm : InstrItinClass; +def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded +def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; -def IIC_fpStorem : InstrItinClass; +def IIC_fpStore_m : InstrItinClass<0>; // micro-coded +def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded def IIC_VLD1 : InstrItinClass; +def IIC_VLD1x2 : InstrItinClass; +def IIC_VLD1x3 : InstrItinClass; +def IIC_VLD1x4 : InstrItinClass; +def IIC_VLD1u : InstrItinClass; +def IIC_VLD1x2u : InstrItinClass; +def IIC_VLD1x3u : InstrItinClass; +def IIC_VLD1x4u : InstrItinClass; +def IIC_VLD1ln : InstrItinClass; +def IIC_VLD1lnu : InstrItinClass; +def IIC_VLD1dup : InstrItinClass; +def IIC_VLD1dupu : InstrItinClass; def IIC_VLD2 : InstrItinClass; +def IIC_VLD2x2 : InstrItinClass; +def IIC_VLD2u : InstrItinClass; +def IIC_VLD2x2u : InstrItinClass; +def IIC_VLD2ln : InstrItinClass; +def IIC_VLD2lnu : InstrItinClass; +def IIC_VLD2dup : InstrItinClass; +def IIC_VLD2dupu : InstrItinClass; def IIC_VLD3 : InstrItinClass; +def IIC_VLD3ln : InstrItinClass; +def IIC_VLD3u : InstrItinClass; +def IIC_VLD3lnu : InstrItinClass; +def IIC_VLD3dup : InstrItinClass; +def IIC_VLD3dupu : InstrItinClass; def IIC_VLD4 : InstrItinClass; -def IIC_VST : InstrItinClass; +def IIC_VLD4ln : InstrItinClass; +def IIC_VLD4u : InstrItinClass; +def IIC_VLD4lnu : InstrItinClass; +def IIC_VLD4dup : InstrItinClass; +def IIC_VLD4dupu : InstrItinClass; +def IIC_VST1 : InstrItinClass; +def IIC_VST1x2 : InstrItinClass; +def IIC_VST1x3 : InstrItinClass; +def IIC_VST1x4 : InstrItinClass; +def IIC_VST1u : InstrItinClass; +def IIC_VST1x2u : InstrItinClass; +def IIC_VST1x3u : InstrItinClass; +def IIC_VST1x4u : InstrItinClass; +def IIC_VST1ln : InstrItinClass; +def IIC_VST1lnu : InstrItinClass; +def IIC_VST2 : InstrItinClass; +def IIC_VST2x2 : InstrItinClass; +def IIC_VST2u : InstrItinClass; +def IIC_VST2x2u : InstrItinClass; +def IIC_VST2ln : InstrItinClass; +def IIC_VST2lnu : InstrItinClass; +def IIC_VST3 : InstrItinClass; +def IIC_VST3u : InstrItinClass; +def IIC_VST3ln : InstrItinClass; +def IIC_VST3lnu : InstrItinClass; +def IIC_VST4 : InstrItinClass; +def IIC_VST4u : InstrItinClass; +def IIC_VST4ln : InstrItinClass; +def IIC_VST4lnu : InstrItinClass; def IIC_VUNAD : InstrItinClass; def IIC_VUNAQ : InstrItinClass; def IIC_VBIND : InstrItinClass; def IIC_VBINQ : InstrItinClass; +def IIC_VPBIND : InstrItinClass; +def IIC_VFMULD : InstrItinClass; +def IIC_VFMULQ : InstrItinClass; +def IIC_VMOV : InstrItinClass; def IIC_VMOVImm : InstrItinClass; def IIC_VMOVD : InstrItinClass; def IIC_VMOVQ : InstrItinClass; @@ -101,6 +202,7 @@ def IIC_VMOVID : InstrItinClass; def IIC_VMOVISL : InstrItinClass; def IIC_VMOVSI : InstrItinClass; def IIC_VMOVDI : InstrItinClass; +def IIC_VMOVN : InstrItinClass; def IIC_VPERMD : InstrItinClass; def IIC_VPERMQ : InstrItinClass; def IIC_VPERMQ3 : InstrItinClass; @@ -152,7 +254,7 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[], []>; +def GenericItineraries : ProcessorItineraries<[], [], []>; include "ARMScheduleV6.td" include "ARMScheduleA8.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 282abca..8d86c01 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -14,18 +14,17 @@ // // Scheduling information derived from "Cortex-A8 Technical Reference Manual". // Functional Units. -def A8_Issue : FuncUnit; // issue def A8_Pipe0 : FuncUnit; // pipeline 0 def A8_Pipe1 : FuncUnit; // pipeline 1 -def A8_LdSt0 : FuncUnit; // pipeline 0 load/store -def A8_LdSt1 : FuncUnit; // pipeline 1 load/store +def A8_LSPipe : FuncUnit; // Load / store pipeline def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe def A8_NLSPipe : FuncUnit; // NEON LS pipe // // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 // def CortexA8Itineraries : ProcessorItineraries< - [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [ + [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe], + [], [ // Two fully-pipelined integer ALU pipelines // // No operand cycles @@ -35,12 +34,23 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>, InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, // + // Bitwise Instructions that produce a result + InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, + // // Unary Instructions that produce a result InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + // + // Zero and sign extension instructions + InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>, // // Compare instructions InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, @@ -48,124 +58,184 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // + // Test instructions + InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + // // Move instructions, unconditional InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, + InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>, + InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LSPipe]>], [5]>, // // Move instructions, conditional InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>, + // + // MVN instructions + InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, + InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, // Integer multiply pipeline // Result written in E5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases // InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, - InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, - InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, - InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, - InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, - InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, // Integer load pipeline // - // loads have an extra cycle of latency, but are fully pipelined - // use A8_Issue to enforce the 1 load/store per cycle limit - // // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1]>, + InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1]>, + InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1]>, + InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1]>, // // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, + // FIXME: lsl by 2 takes 1 cycle. + InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>, + InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>, // // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, + InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>, + InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, + InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, + InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, + InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, - // - // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>, - InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>]>, + InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>, + InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>, + // + // Load multiple, def is the 5th operand. Pipeline 0 only. + // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. + InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>, + // + // Load multiple + update, defs are the 1st and 5th operands. + InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>, + InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>, + // + // Load multiple plus branch + InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>, + InstrStage<3, [A8_LSPipe]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], + [1, 2, 1, 1, 3]>, + // + // Pop, def is the 3rd operand. + InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>, + InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>, + // + // Push, def is the 3th operand. + InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>, + InstrStage<3, [A8_LSPipe]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], + [1, 1, 3]>, - // Integer store pipeline // - // use A8_Issue to enforce the 1 load/store per cycle limit + // iLoadi + iALUr for t2LDRpci_pic. + InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>, + + + // Integer store pipeline // // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1]>, + InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1]>, + InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1]>, + InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1]>, // // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>, // // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, + InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>, + InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>, // // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, + InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, + InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, + InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, - // - // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>, - InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0]>]>, + InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>, + InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>, + // + // Store multiple. Pipeline 0 only. + // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. + InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_LSPipe]>]>, + // + // Store multiple + update + InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_LSPipe]>], [2]>, + + // + // Preload + InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, // Branch // @@ -178,440 +248,786 @@ def CortexA8Itineraries : ProcessorItineraries< // possible. // // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_NLSPipe]>]>, + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe]>], [20]>, // // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<4, [A8_NPipe], 0>, InstrStage<4, [A8_NLSPipe]>], [4, 1]>, // // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [1, 1]>, // // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<4, [A8_NPipe], 0>, InstrStage<4, [A8_NLSPipe]>], [4, 1]>, // // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<7, [A8_NPipe], 0>, InstrStage<7, [A8_NLSPipe]>], [7, 1]>, // // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<5, [A8_NPipe], 0>, InstrStage<5, [A8_NLSPipe]>], [5, 1]>, // // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<8, [A8_NPipe], 0>, InstrStage<8, [A8_NLSPipe]>], [8, 1]>, // // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [7, 1]>, // // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<8, [A8_NPipe], 0>, InstrStage<8, [A8_NLSPipe]>], [8, 1]>, // // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, // // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<9, [A8_NPipe], 0>, InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, // // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, // // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<11, [A8_NPipe], 0>, InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, // // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, // // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<19, [A8_NPipe], 0>, InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, // // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<20, [A8_NPipe], 0>, InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, // // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<29, [A8_NPipe], 0>, InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, // // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<19, [A8_NPipe], 0>, InstrStage<19, [A8_NLSPipe]>], [19, 1]>, // // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<29, [A8_NPipe], 0>, InstrStage<29, [A8_NLSPipe]>], [29, 1]>, + + // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], + [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], + [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], + [20, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], + [20, 20, 1]>, + // // Single-precision FP Load - // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>]>, + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>], + [2, 1]>, // // Double-precision FP Load - // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>]>, + InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>], + [2, 1]>, // // FP Load Multiple - // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, - InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>]>, + // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. + InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>, + // + // FP Load Multiple + update + InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>, // // Single-precision FP Store - // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>]>, + InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>], + [1, 1]>, // // Double-precision FP Store - // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0], 0>, - InstrStage<1, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>]>, + InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>], + [1, 1]>, // // FP Store Multiple - // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, - InstrStage<2, [A8_Pipe0], 0>, - InstrStage<2, [A8_Pipe1]>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>]>, + InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>, + // + // FP Store Multiple + update + InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>, + InstrStage<1, [A8_NLSPipe], 0>, + InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>, // NEON // Issue through integer pipeline, and execute in NEON unit. // // VLD1 - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>]>, + InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 1]>, + // VLD1x2 + InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 2, 1]>, + // + // VLD1x3 + InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 1]>, + // + // VLD1x4 + InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 3, 1]>, + // + // VLD1u + InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 2, 1]>, + // + // VLD1x2u + InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 2, 2, 1]>, + // + // VLD1x3u + InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 2, 1]>, + // + // VLD1x4u + InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 3, 2, 1]>, + // + // VLD1ln + InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [3, 1, 1, 1]>, + // + // VLD1lnu + InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [3, 2, 1, 1, 1, 1]>, + // + // VLD1dup + InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 1]>, + // + // VLD1dupu + InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 2, 1, 1]>, // // VLD2 - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, + InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 2, 1]>, + // + // VLD2x2 + InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 3, 1]>, + // + // VLD2ln + InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [3, 3, 1, 1, 1, 1]>, + // + // VLD2u + InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 2, 2, 1, 1, 1]>, + // + // VLD2x2u + InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 3, 2, 1]>, + // + // VLD2lnu + InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [3, 3, 2, 1, 1, 1, 1, 1]>, + // + // VLD2dup + InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 2, 1]>, + // + // VLD2dupu + InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 2, 2, 1, 1]>, // // VLD3 - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, + InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 1]>, + // + // VLD3ln + InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<5, [A8_NLSPipe], 0>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 1, 1, 1, 1, 2]>, + // + // VLD3u + InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 2, 1]>, + // + // VLD3lnu + InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<5, [A8_NLSPipe], 0>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>, + // + // VLD3dup + InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 1]>, + // + // VLD3dupu + InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 2, 1, 1]>, // // VLD4 - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, + InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 4, 1]>, + // + // VLD4ln + InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<5, [A8_NLSPipe], 0>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, + // + // VLD4u + InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [3, 3, 4, 4, 2, 1]>, + // + // VLD4lnu + InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<5, [A8_NLSPipe], 0>, + InstrStage<5, [A8_LSPipe]>], + [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VLD4dup + InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 3, 1]>, + // + // VLD4dupu + InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 2, 3, 3, 2, 1, 1]>, + // + // VST1 + InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1]>, // - // VST - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, - InstrStage<1, [A8_Pipe0, A8_Pipe1]>, - InstrStage<1, [A8_LdSt0], 0>, - InstrStage<1, [A8_NLSPipe]>]>, + // VST1x2 + InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1]>, + // + // VST1x3 + InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST1x4 + InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST1u + InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1]>, + // + // VST1x2u + InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST1x3u + InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST1x4u + InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST1ln + InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1]>, + // + // VST1lnu + InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1]>, + // + // VST2 + InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1]>, + // + // VST2x2 + InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST2u + InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST2x2u + InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST2ln + InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1]>, + // + // VST2lnu + InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<2, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST3 + InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST3u + InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST3ln + InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 1, 1, 2]>, + // + // VST3lnu + InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<3, [A8_NLSPipe], 0>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST4 + InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST4u + InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST4ln + InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST4lnu + InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<4, [A8_NLSPipe], 0>, + InstrStage<4, [A8_LSPipe]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, // // Double-register FP Unary - InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [5, 2]>, // // Quad-register FP Unary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases - InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [6, 2]>, // // Double-register FP Binary - InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, // + // VPADD, etc. + InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, + // + // Double-register FP VMUL + InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [5, 2, 1]>, + + // // Quad-register FP Binary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases - InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, // + // Quad-register FP VMUL + InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [6, 2, 1]>, + // + // Move + InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [1, 1]>, + // // Move Immediate - InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [3]>, // // Double-register Permute Move - InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 1]>, // // Quad-register Permute Move // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NLSPipe]>], [3, 1]>, // // Integer to Single-precision Move - InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 1]>, // // Integer to Double-precision Move - InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, // // Single-precision to Integer Move - InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>], [20, 1]>, // // Double-precision to Integer Move - InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, // // Integer to Lane Move - InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, // + // Vector narrow move + InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [2, 1]>, + // // Double-register Permute - InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, // // Quad-register Permute // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>, // // Quad-register Permute (3 cycle issue) // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 4 for those cases - InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>, // // Double-register FP Multiple-Accumulate - InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, // // Quad-register FP Multiple-Accumulate // Result written in N9, but that is relative to the last cycle of multicycle, // so we use 10 for those cases - InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, // // Double-register Reciprical Step - InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, // // Quad-register Reciprical Step - InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [10, 2, 2]>, // // Double-register Integer Count - InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Count // Result written in N3, but that is relative to the last cycle of multicycle, // so we use 4 for those cases - InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [4, 2, 2]>, // // Double-register Integer Unary - InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 2]>, // // Quad-register Integer Unary - InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 2]>, // // Double-register Integer Q-Unary - InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 1]>, // // Quad-register Integer CountQ-Unary - InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 1]>, // // Double-register Integer Binary - InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Binary - InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, // // Double-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, // // Quad-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, // // Double-register Integer Subtract - InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, // // Quad-register Integer Subtract - InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, // // Double-register Integer Subtract - InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, // // Quad-register Integer Subtract - InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, // // Double-register Integer Shift - InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [3, 1, 1]>, // // Quad-register Integer Shift - InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [4, 1, 1]>, // // Double-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [4, 1, 1]>, // // Quad-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [5, 1, 1]>, // // Double-register Integer Pair Add Long - InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [6, 3, 1]>, // // Quad-register Integer Pair Add Long - InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [7, 3, 1]>, // // Double-register Absolute Difference and Accumulate - InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>, // // Quad-register Absolute Difference and Accumulate - InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>, // // Double-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [6, 2, 2]>, // // Double-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [7, 2, 1]>, // // Quad-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [7, 2, 2]>, // // Quad-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>, InstrStage<2, [A8_NLSPipe], 0>, InstrStage<3, [A8_NPipe]>], [9, 2, 1]>, // // Double-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>, // // Double-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>, // // Quad-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>, // // Quad-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>, InstrStage<2, [A8_NLSPipe], 0>, InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>, // // Double-register VEXT - InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, // // Quad-register VEXT - InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, // // VTB - InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>, - InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>, - InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>, - InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>, // // VTBX - InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>, - InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>, - InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>, - InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index df2f896..82c6735 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -16,130 +16,417 @@ // Reference Manual". // // Functional units -def A9_Pipe0 : FuncUnit; // pipeline 0 -def A9_Pipe1 : FuncUnit; // pipeline 1 -def A9_LSPipe : FuncUnit; // LS pipe -def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A9_Issue0 : FuncUnit; // Issue 0 +def A9_Issue1 : FuncUnit; // Issue 1 +def A9_Branch : FuncUnit; // Branch +def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0 +def A9_ALU1 : FuncUnit; // ALU pipeline 1 +def A9_AGU : FuncUnit; // Address generation unit for ld / st +def A9_NPipe : FuncUnit; // NEON pipeline +def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer +def A9_LSUnit : FuncUnit; // L/S Unit def A9_DRegsVFP: FuncUnit; // FP register set, VFP side def A9_DRegsN : FuncUnit; // FP register set, NEON side -// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 -// +// Bypasses +def A9_LdBypass : Bypass; + def CortexA9Itineraries : ProcessorItineraries< - [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [ + [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0, + A9_LSUnit, A9_DRegsVFP, A9_DRegsN], + [A9_LdBypass], [ // Two fully-pipelined integer ALU pipelines - // FIXME: There are no operand latencies for these instructions at all! + // // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, + InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>, + InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], [5]>, + // + // MVN instructions + InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], + [1]>, + InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], + [1, 1], [NoBypass, A9_LdBypass]>, + InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], + [2, 1]>, + InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<3, [A9_ALU0, A9_ALU1]>], + [3, 1, 1]>, // // No operand cycles - InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>, + InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>]>, // // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], + [1, 1], [NoBypass, A9_LdBypass]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], + [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>, + InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], + [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>, + InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], + [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>, + InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<3, [A9_ALU0, A9_ALU1]>], + [3, 1, 1, 1], + [NoBypass, A9_LdBypass, NoBypass, NoBypass]>, + // + // Bitwise Instructions that produce a result + InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, + InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, + InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, + InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, // // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + + // CLZ, RBIT, etc. + InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, + + // BFC, BFI, UBFX, SBFX + InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>, + + // + // Zero and sign extension instructions + InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>, + InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>, + InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>, // // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], + [1], [A9_LdBypass]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], + [1, 1], [A9_LdBypass, A9_LdBypass]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], + [1, 1], [A9_LdBypass, NoBypass]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<3, [A9_ALU0, A9_ALU1]>], + [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>, + // + // Test instructions + InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, + InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, + InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>, + InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>, // // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + // FIXME: Correctly model the extra input dep on the destination. + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>, // Integer multiply pipeline // - InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>, - InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>, - InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, - InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>, - InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>, - InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, - InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>, - InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>, - InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, + InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0]>], [3, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<2, [A9_ALU0]>], + [4, 1, 1, 1]>, + InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<3, [A9_ALU0]>], + [4, 5, 1, 1]>, // Integer load pipeline // FIXME: The timings are some rough approximations // // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>], [3, 1]>, + InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [3, 1], [A9_LdBypass]>, + InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [4, 1], [A9_LdBypass]>, + // FIXME: If address is 64-bit aligned, AGU cycles is 1. + InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [3, 3, 1], [A9_LdBypass]>, // // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [3, 1, 1], [A9_LdBypass]>, + InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [4, 1, 1], [A9_LdBypass]>, + InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [3, 3, 1, 1], [A9_LdBypass]>, // // Scaled register offset - InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>, + InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit], 0>], + [4, 1, 1], [A9_LdBypass]>, + InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [5, 1, 1], [A9_LdBypass]>, // // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>, + InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [3, 2, 1], [A9_LdBypass]>, + InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [4, 3, 1], [A9_LdBypass]>, // // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>, + InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [3, 2, 1, 1], [A9_LdBypass]>, + InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [4, 3, 1, 1], [A9_LdBypass]>, + InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [3, 3, 1, 1], [A9_LdBypass]>, // // Scaled register offset with update - InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>, + InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [4, 3, 1, 1], [A9_LdBypass]>, + InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [5, 4, 1, 1], [A9_LdBypass]>, + // + // Load multiple, def is the 5th operand. + // FIXME: This assumes 3 to 4 registers. + InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1, 3], + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + // + // Load multiple + update, defs are the 1st and 5th operands. + InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1, 3], + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + // + // Load multiple plus branch + InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 1>, + InstrStage<2, [A9_LSUnit]>, + InstrStage<1, [A9_Branch]>], + [1, 2, 1, 1, 3], + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + // + // Pop, def is the 3rd operand. + InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 3], + [NoBypass, NoBypass, A9_LdBypass]>, + // + // Pop + branch, def is the 3rd operand. + InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<2, [A9_LSUnit]>, + InstrStage<1, [A9_Branch]>], + [1, 1, 3], + [NoBypass, NoBypass, A9_LdBypass]>, + // - // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>]>, + // iLoadi + iALUr for t2LDRpci_pic. + InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>, + InstrStage<1, [A9_ALU0, A9_ALU1]>], + [2, 1]>, // Integer store pipeline /// // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>], [3, 1]>, + InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1]>, + InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], [1, 1]>, + // FIXME: If address is 64-bit aligned, AGU cycles is 1. + InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], [1, 1]>, // // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, + InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, // // Scaled register offset - InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>, + InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, + InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, // // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>, + InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>, + InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>, // // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>, + InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], + [3, 1, 1, 1]>, + InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], + [3, 1, 1, 1]>, // // Scaled register offset with update - InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>, + InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_AGU], 1>, + InstrStage<1, [A9_LSUnit]>], + [3, 1, 1, 1]>, // // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>]>, + InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<2, [A9_LSUnit]>]>, + // + // Store multiple + update + InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_AGU], 0>, + InstrStage<2, [A9_LSUnit]>], [2]>, + + // + // Preload + InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>, + // Branch // // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>, + InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>, + InstrStage<1, [A9_Issue1], 0>, + InstrStage<1, [A9_Branch]>]>, // VFP and NEON shares the same register file. This means that every VFP // instruction should wait for full completion of the consecutive NEON @@ -159,687 +446,1379 @@ def CortexA9Itineraries : ProcessorItineraries< // Issue through integer pipeline, and execute in NEON unit. // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<1, [A9_NPipe]>], + [1]>, // // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [1, 1]>, + InstrStage<1, [A9_NPipe]>], + [1, 1]>, // // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [1, 1]>, + InstrStage<1, [A9_NPipe]>], + [1, 1]>, // // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 4 cycles InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [1, 1]>, + InstrStage<1, [A9_NPipe]>], + [1, 1]>, // // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 4 cycles InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [1, 1]>, + InstrStage<1, [A9_NPipe]>], + [1, 1]>, // // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Single to Half FP Convert - InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Half to Single FP Convert - InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [2, 1]>, + InstrStage<1, [A9_NPipe]>], + [2, 1]>, // // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1, 1]>, // // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1, 1]>, // // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<6, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [5, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [5, 1, 1]>, // // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<7, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [6, 1, 1]>, + InstrStage<2, [A9_NPipe]>], + [6, 1, 1]>, // // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<9, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [8, 1, 1, 1]>, // // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<10, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>, + InstrStage<2, [A9_NPipe]>], + [9, 1, 1, 1]>, // // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<16, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<10, [A9_NPipe]>], [15, 1, 1]>, + InstrStage<10, [A9_NPipe]>], + [15, 1, 1]>, // // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<26, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<20, [A9_NPipe]>], [25, 1, 1]>, + InstrStage<20, [A9_NPipe]>], + [25, 1, 1]>, // // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<18, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<13, [A9_NPipe]>], [17, 1]>, + InstrStage<13, [A9_NPipe]>], + [17, 1]>, // // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<33, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<28, [A9_NPipe]>], [32, 1]>, + InstrStage<28, [A9_NPipe]>], + [32, 1]>, // // Integer to Single-precision Move - InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra 1 latency cycle since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [1, 1]>, + InstrStage<1, [A9_NPipe]>], + [1, 1]>, // // Integer to Double-precision Move - InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra 1 latency cycle since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [1, 1, 1]>, // // Single-precision to Integer Move - InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [1, 1]>, + InstrStage<1, [A9_NPipe]>], + [2, 1]>, // // Double-precision to Integer Move - InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [2, 1, 1]>, // // Single-precision FP Load - InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1]>, // // Double-precision FP Load - InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // FIXME: Result latency is 1 if address is 64-bit aligned. + InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 1]>, // // FP Load Multiple - InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, + // + // FP Load Multiple + update + InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>, // // Single-precision FP Store - InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1]>, // // Double-precision FP Store - InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1]>, // // FP Store Multiple - InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, + // + // FP Store Multiple + update + InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>, // NEON - // Issue through integer pipeline, and execute in NEON unit. - // FIXME: Neon pipeline and LdSt unit are multiplexed. - // Add some syntactic sugar to model this! // VLD1 - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>]>, + // FIXME: Conservatively assume insufficent alignment. + InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 1]>, + // VLD1x2 + InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 1]>, + // VLD1x3 + InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 2, 3, 1]>, + // VLD1x4 + InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 2, 3, 3, 1]>, + // VLD1u + InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 1]>, + // VLD1x2u + InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 2, 1]>, + // VLD1x3u + InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 2, 3, 2, 1]>, + // VLD1x4u + InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 2, 3, 3, 2, 1]>, + // + // VLD1ln + InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [4, 1, 1, 1]>, + // + // VLD1lnu + InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [4, 2, 1, 1, 1, 1]>, + // + // VLD1dup + InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 1]>, + // + // VLD1dupu + InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 2, 1, 1]>, // // VLD2 - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, + InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 1]>, + // + // VLD2x2 + InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 4, 3, 4, 1]>, + // + // VLD2ln + InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [4, 4, 1, 1, 1, 1]>, + // + // VLD2u + InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 2, 1, 1, 1]>, + // + // VLD2x2u + InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 4, 3, 4, 2, 1]>, + // + // VLD2lnu + InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [4, 4, 2, 1, 1, 1, 1, 1]>, + // + // VLD2dup + InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 1]>, + // + // VLD2dupu + InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 2, 1, 1]>, // // VLD3 - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, + InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 1]>, + // + // VLD3ln + InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<11,[A9_DRegsVFP], 0, Reserved>, + InstrStage<5, [A9_NPipe], 0>, + InstrStage<5, [A9_LSUnit]>], + [5, 5, 6, 1, 1, 1, 1, 2]>, + // + // VLD3u + InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 2, 1]>, + // + // VLD3lnu + InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<11,[A9_DRegsVFP], 0, Reserved>, + InstrStage<5, [A9_NPipe], 0>, + InstrStage<5, [A9_LSUnit]>], + [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>, + // + // VLD3dup + InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 1]>, + // + // VLD3dupu + InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 2, 1, 1]>, // // VLD4 - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, - // - // VST - // FIXME: We don't model this instruction properly - InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1], 0>, - InstrStage<1, [A9_LSPipe]>, - InstrStage<1, [A9_NPipe]>]>, + InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 5, 1]>, + // + // VLD4ln + InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<11,[A9_DRegsVFP], 0, Reserved>, + InstrStage<5, [A9_NPipe], 0>, + InstrStage<5, [A9_LSUnit]>], + [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>, + // + // VLD4u + InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 5, 2, 1]>, + // + // VLD4lnu + InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<11,[A9_DRegsVFP], 0, Reserved>, + InstrStage<5, [A9_NPipe], 0>, + InstrStage<5, [A9_LSUnit]>], + [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VLD4dup + InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 4, 1]>, + // + // VLD4dupu + InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 4, 2, 1, 1]>, + // + // VST1 + InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1]>, + // + // VST1x2 + InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1]>, + // + // VST1x3 + InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [1, 1, 1, 1, 2]>, + // + // VST1x4 + InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST1u + InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1, 1]>, + // + // VST1x2u + InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST1x3u + InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST1x4u + InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST1ln + InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1]>, + // + // VST1lnu + InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1]>, + // + // VST2 + InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1]>, + // + // VST2x2 + InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST2u + InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST2x2u + InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST2ln + InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1]>, + // + // VST2lnu + InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1]>, + // + // VST3 + InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [1, 1, 1, 1, 2]>, + // + // VST3u + InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST3ln + InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [1, 1, 1, 1, 2]>, + // + // VST3lnu + InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1, 2]>, + // + // VST4 + InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST4u + InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // + // VST4ln + InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [1, 1, 1, 1, 2, 2]>, + // + // VST4lnu + InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [2, 1, 1, 1, 1, 1, 2, 2]>, + // // Double-register Integer Unary - InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 2]>, + InstrStage<1, [A9_NPipe]>], + [4, 2]>, // // Quad-register Integer Unary - InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 2]>, + InstrStage<1, [A9_NPipe]>], + [4, 2]>, // // Double-register Integer Q-Unary - InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Quad-register Integer CountQ-Unary - InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1]>, // // Double-register Integer Binary - InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [3, 2, 2]>, // // Quad-register Integer Binary - InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [3, 2, 2]>, // // Double-register Integer Subtract - InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, + InstrStage<1, [A9_NPipe]>], + [3, 2, 1]>, // // Quad-register Integer Subtract - InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, + InstrStage<1, [A9_NPipe]>], + [3, 2, 1]>, // // Double-register Integer Shift - InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [3, 1, 1]>, // // Quad-register Integer Shift - InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [3, 1, 1]>, // // Double-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1, 1]>, // // Quad-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 1, 1]>, // // Double-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [4, 2, 2]>, // // Quad-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [4, 2, 2]>, // // Double-register Integer Subtract (4 cycle) - InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 2, 1]>, // // Quad-register Integer Subtract (4 cycle) - InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, + InstrStage<1, [A9_NPipe]>], + [4, 2, 1]>, // // Double-register Integer Count - InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [3, 2, 2]>, // // Quad-register Integer Count // Result written in N3, but that is relative to the last cycle of multicycle, // so we use 4 for those cases - InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [4, 2, 2]>, + InstrStage<2, [A9_NPipe]>], + [4, 2, 2]>, // // Double-register Absolute Difference and Accumulate - InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>, + InstrStage<1, [A9_NPipe]>], + [6, 3, 2, 1]>, // // Quad-register Absolute Difference and Accumulate - InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, + InstrStage<2, [A9_NPipe]>], + [6, 3, 2, 1]>, // // Double-register Integer Pair Add Long - InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [6, 3, 1]>, + InstrStage<1, [A9_NPipe]>], + [6, 3, 1]>, // // Quad-register Integer Pair Add Long - InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [6, 3, 1]>, + InstrStage<2, [A9_NPipe]>], + [6, 3, 1]>, // // Double-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [6, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [6, 2, 2]>, // // Quad-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [7, 2, 2]>, + InstrStage<2, [A9_NPipe]>], + [7, 2, 2]>, // // Double-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [7, 2, 1]>, + InstrStage<2, [A9_NPipe]>], + [7, 2, 1]>, // // Quad-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<4, [A9_NPipe]>], [9, 2, 1]>, + InstrStage<4, [A9_NPipe]>], + [9, 2, 1]>, // // Double-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [6, 3, 2, 2]>, // // Double-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>, + InstrStage<2, [A9_NPipe]>], + [7, 3, 2, 1]>, // // Quad-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>, + InstrStage<2, [A9_NPipe]>], + [7, 3, 2, 2]>, // // Quad-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>, + InstrStage<4, [A9_NPipe]>], + [9, 3, 2, 1]>, + + // + // Move + InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [1,1]>, // // Move Immediate - InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [3]>, + InstrStage<1, [A9_NPipe]>], + [3]>, // // Double-register Permute Move - InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>, - // FIXME: all latencies are arbitrary, no information is available - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_LSPipe]>], [2, 1]>, + InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [2, 1]>, // // Quad-register Permute Move - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 3 for those cases - InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>, - // FIXME: all latencies are arbitrary, no information is available - InstrStage<4, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [3, 1]>, + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [2, 1]>, // // Integer to Single-precision Move - InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>, - // FIXME: all latencies are arbitrary, no information is available + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [2, 1]>, + InstrStage<1, [A9_NPipe]>], + [1, 1]>, // // Integer to Double-precision Move - InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>, - // FIXME: all latencies are arbitrary, no information is available + InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [1, 1, 1]>, // // Single-precision to Integer Move - InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>, - // FIXME: all latencies are arbitrary, no information is available + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [2, 1]>, + InstrStage<1, [A9_NPipe]>], + [2, 1]>, // // Double-precision to Integer Move - InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>, - // FIXME: all latencies are arbitrary, no information is available + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, + InstrStage<1, [A9_NPipe]>], + [2, 2, 1]>, // // Integer to Lane Move - InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>, - // FIXME: all latencies are arbitrary, no information is available + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<4, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, + InstrStage<2, [A9_NPipe]>], + [3, 1, 1]>, // + // Vector narrow move + InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [3, 1]>, + // // Double-register FP Unary - InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [5, 2]>, + InstrStage<1, [A9_NPipe]>], + [5, 2]>, // // Quad-register FP Unary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases - InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [6, 2]>, + InstrStage<2, [A9_NPipe]>], + [6, 2]>, // // Double-register FP Binary // FIXME: We're using this itin for many instructions and [2, 2] here is too // optimistic. - InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 7 cycles + InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [5, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [5, 2, 2]>, + + // + // VPADD, etc. + InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [5, 1, 1]>, + // + // Double-register FP VMUL + InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [5, 2, 1]>, // // Quad-register FP Binary // Result written in N5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases // FIXME: We're using this itin for many instructions and [2, 2] here is too // optimistic. - InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 8 cycles + InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [6, 2, 2]>, + // + // Quad-register FP VMUL + InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, + InstrStage<1, [A9_NPipe]>], + [6, 2, 1]>, // // Double-register FP Multiple-Accumulate - InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, + InstrStage<2, [A9_NPipe]>], + [6, 3, 2, 1]>, // // Quad-register FP Multiple-Accumulate // Result written in N9, but that is relative to the last cycle of multicycle, // so we use 10 for those cases - InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, + InstrStage<4, [A9_NPipe]>], + [8, 4, 2, 1]>, // // Double-register Reciprical Step - InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, + InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 10 cycles + InstrStage<11, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [9, 2, 2]>, // // Quad-register Reciprical Step - InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 9 cycles - InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<4, [A9_NPipe]>], [8, 2, 2]>, + InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 11 cycles + InstrStage<12, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [10, 2, 2]>, // // Double-register Permute - InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [2, 2, 1, 1]>, // // Quad-register Permute // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 3 for those cases - InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>, + InstrStage<2, [A9_NPipe]>], + [3, 3, 1, 1]>, // // Quad-register Permute (3 cycle issue) // Result written in N2, but that is relative to the last cycle of multicycle, // so we use 4 for those cases - InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, + InstrStage<3, [A9_NPipe]>], + [4, 4, 1, 1]>, // // Double-register VEXT - InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 7 cycles + InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, + InstrStage<1, [A9_NPipe]>], + [2, 1, 1]>, // // Quad-register VEXT - InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 9 cycles + InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, + InstrStage<2, [A9_NPipe]>], + [3, 1, 2]>, // // VTB - InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [3, 2, 1]>, - InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_NPipe]>], + [3, 2, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>, - InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_NPipe]>], + [3, 2, 2, 1]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>, - InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_NPipe]>], + [4, 2, 2, 3, 1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>, + InstrStage<3, [A9_NPipe]>], + [4, 2, 2, 3, 3, 1]>, // // VTBX - InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>, - InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_NPipe]>], + [3, 1, 2, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>, - InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<2, [A9_NPipe]>], + [3, 1, 2, 2, 1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>, - InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<3, [A9_NPipe]>], + [4, 1, 2, 2, 3, 1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrStage<2, [A9_NPipe]>], + [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 08b560c..c1880a7 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -19,7 +19,7 @@ def V6_Pipe : FuncUnit; // pipeline // Scheduling information derived from "ARM1176JZF-S Technical Reference Manual" // def ARMV6Itineraries : ProcessorItineraries< - [V6_Pipe], [ + [V6_Pipe], [], [ // // No operand cycles InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>, @@ -30,10 +30,20 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>, // + // Bitwise Instructions that produce a result + InstrItinData<IIC_iBITi , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iBITr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, + InstrItinData<IIC_iBITsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iBITsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>, + // // Unary Instructions that produce a result InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, + // + // Zero and sign extension instructions + InstrItinData<IIC_iEXTr , [InstrStage<1, [V6_Pipe]>], [1, 1]>, + InstrItinData<IIC_iEXTAr , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iEXTAsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>, // // Compare instructions InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>, @@ -41,17 +51,39 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // + // Test instructions + InstrItinData<IIC_iTSTi , [InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iTSTr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iTSTsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iTSTsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, + // // Move instructions, unconditional InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>, InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, + InstrItinData<IIC_iMOVix2 , [InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [3]>, + InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [5]>, // // Move instructions, conditional InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>, InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>, InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>, InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, + InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [4]>, + // + // MVN instructions + InstrItinData<IIC_iMVNi , [InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iMVNr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iMVNsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iMVNsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // Integer multiply pipeline // @@ -65,50 +97,90 @@ def ARMV6Itineraries : ProcessorItineraries< // Integer load pipeline // // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [V6_Pipe]>], [4, 1]>, + InstrItinData<IIC_iLoad_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>, + InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>, + InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>, // // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, + InstrItinData<IIC_iLoad_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, + InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, + InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>, + InstrItinData<IIC_iLoad_si , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>, + InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>, // // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, + InstrItinData<IIC_iLoad_iu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, + InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>, + InstrItinData<IIC_iLoad_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>, + InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>, + InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>, + InstrItinData<IIC_iLoad_siu, [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>, + InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>, + + // + // Load multiple, def is the 5th operand. + InstrItinData<IIC_iLoad_m , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>, + // + // Load multiple + update, defs are the 1st and 5th operands. + InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>, + // + // Load multiple plus branch + InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>, + + // + // iLoadi + iALUr for t2LDRpci_pic. + InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [3, 1]>, // - // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>, + // Pop, def is the 3rd operand. + InstrItinData<IIC_iPop , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>, + // + // Pop + branch, def is the 3rd operand. + InstrItinData<IIC_iPop_Br, [InstrStage<3, [V6_Pipe]>, + InstrStage<1, [V6_Pipe]>], [1, 2, 4]>, // Integer store pipeline // // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iStore_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>, // // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>, - + InstrItinData<IIC_iStore_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>, + InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>, + InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iStore_si , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>, // // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iStore_iu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iStore_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>, + InstrItinData<IIC_iStore_siu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>, + InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>, // // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<3, [V6_Pipe]>]>, + InstrItinData<IIC_iStore_m , [InstrStage<3, [V6_Pipe]>]>, + // + // Store multiple + update + InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>, // Branch // @@ -183,6 +255,18 @@ def ARMV6Itineraries : ProcessorItineraries< // Double-precision FP SQRT InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>, // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [V6_Pipe]>], [10, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [V6_Pipe]>], [10, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>, + // // Single-precision FP Load InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>, // @@ -190,7 +274,10 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>, // // FP Load Multiple - InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>, + InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>, + // + // FP Load Multiple + update + InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>, // // Single-precision FP Store InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, @@ -200,5 +287,8 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, // // FP Store Multiple - InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]> + InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>, + // + // FP Store Multiple + update + InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]> ]>; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index a289407..2b9202b 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -29,10 +29,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) @@ -66,7 +64,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); + SrcPtrInfo.getWithOffset(SrcOff), isVolatile, + false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -77,7 +76,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, isVolatile, false, 0); + DstPtrInfo.getWithOffset(DstOff), + isVolatile, false, 0); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); @@ -103,7 +103,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, false, false, 0); + SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -125,7 +125,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, false, false, 0); + DstPtrInfo.getWithOffset(DstOff), false, false, 0); ++i; DstOff += VTSize; BytesLeft -= VTSize; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index d7d00c2..7533690 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -33,10 +33,8 @@ public: SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; }; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index cb539f4..0bd740c 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -13,6 +13,7 @@ #include "ARMSubtarget.h" #include "ARMGenSubtarget.inc" +#include "ARMBaseRegisterInfo.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -24,45 +25,52 @@ ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); static cl::opt<bool> -UseMOVT("arm-use-movt", - cl::init(true), cl::Hidden); +DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden); + +static cl::opt<bool> +StrictAlign("arm-strict-align", cl::Hidden, + cl::desc("Disallow all unaligned memory accesses")); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) : ARMArchVersion(V4) + , ARMProcFamily(Others) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) - , SlowVMLx(false) + , SlowFPVMLx(false) , SlowFPBrcc(false) , IsThumb(isT) , ThumbMode(Thumb1) , NoARM(false) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) - , UseMovt(UseMOVT) + , UseMovt(false) , HasFP16(false) + , HasD16(false) , HasHardwareDivide(false) , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) + , HasMPExtension(false) , FPOnlySP(false) + , AllowsUnalignedMem(false) , stackAlignment(4) , CPUString("generic") - , TargetType(isELF) // Default to ELF unless otherwise specified. + , TargetTriple(TT) , TargetABI(ARM_ABI_APCS) { - // default to soft float ABI + // Default to soft float ABI if (FloatABIType == FloatABI::Default) FloatABIType = FloatABI::Soft; // Determine default and user specified characteristics - // Parse features string. - CPUString = ParseSubtargetFeatures(FS, CPUString); - // When no arch is specified either by CPU or by attributes, make the default // ARMv4T. - if (CPUString == "generic" && (FS.empty() || FS == "generic")) + const char *ARMArchFeature = ""; + if (CPUString == "generic" && (FS.empty() || FS == "generic")) { ARMArchVersion = V4T; + ARMArchFeature = ",+v4t"; + } // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. @@ -80,47 +88,78 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { ARMArchVersion = V7A; - if (Len >= Idx+2 && TT[Idx+1] == 'm') + ARMArchFeature = ",+v7a"; + if (Len >= Idx+2 && TT[Idx+1] == 'm') { ARMArchVersion = V7M; + ARMArchFeature = ",+v7m"; + } } else if (SubVer == '6') { ARMArchVersion = V6; - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') + ARMArchFeature = ",+v6"; + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') { ARMArchVersion = V6T2; + ARMArchFeature = ",+v6t2"; + } } else if (SubVer == '5') { ARMArchVersion = V5T; - if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') + ARMArchFeature = ",+v5t"; + if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') { ARMArchVersion = V5TE; + ARMArchFeature = ",+v5te"; + } } else if (SubVer == '4') { - if (Len >= Idx+2 && TT[Idx+1] == 't') + if (Len >= Idx+2 && TT[Idx+1] == 't') { ARMArchVersion = V4T; - else + ARMArchFeature = ",+v4t"; + } else { ARMArchVersion = V4; + ARMArchFeature = ""; + } } } + if (TT.find("eabi") != std::string::npos) + TargetABI = ARM_ABI_AAPCS; + + // Parse features string. If the first entry in FS (the CPU) is missing, + // insert the architecture feature derived from the target triple. This is + // important for setting features that are implied based on the architecture + // version. + std::string FSWithArch; + if (FS.empty()) + FSWithArch = std::string(ARMArchFeature); + else if (FS.find(',') == 0) + FSWithArch = std::string(ARMArchFeature) + FS; + else + FSWithArch = FS; + CPUString = ParseSubtargetFeatures(FSWithArch, CPUString); + + // After parsing Itineraries, set ItinData.IssueWidth. + computeIssueWidth(); + // Thumb2 implies at least V6T2. if (ARMArchVersion >= V6T2) ThumbMode = Thumb2; else if (ThumbMode >= Thumb2) ARMArchVersion = V6T2; - if (Len >= 10) { - if (TT.find("-darwin") != std::string::npos) - // arm-darwin - TargetType = isDarwin; - } - - if (TT.find("eabi") != std::string::npos) - TargetABI = ARM_ABI_AAPCS; - if (isAAPCS_ABI()) stackAlignment = 8; - if (isTargetDarwin()) + if (!isTargetDarwin()) + UseMovt = hasV6T2Ops(); + else { IsR9Reserved = ReserveR9 | (ARMArchVersion < V6); + UseMovt = DarwinUseMOVT && hasV6T2Ops(); + } if (!isThumb() || hasThumb2()) PostRAScheduler = true; + + // v6+ may or may not support unaligned mem access depending on the system + // configuration. + if (!StrictAlign && hasV6Ops() && isTargetDarwin()) + AllowsUnalignedMem = true; } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. @@ -163,7 +202,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, // through a stub. if (!isDecl && !GV->isWeakForLinker()) return false; - + // Unless we have a symbol with hidden visibility, we have to go through a // normal $non_lazy_ptr stub because this symbol might be resolved late. if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. @@ -174,6 +213,34 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, return false; } +unsigned ARMSubtarget::getMispredictionPenalty() const { + // If we have a reasonable estimate of the pipeline depth, then we can + // estimate the penalty of a misprediction based on that. + if (isCortexA8()) + return 13; + else if (isCortexA9()) + return 8; + + // Otherwise, just return a sensible default. + return 10; +} + +void ARMSubtarget::computeIssueWidth() { + unsigned allStage1Units = 0; + for (const InstrItinerary *itin = InstrItins.Itineraries; + itin->FirstStage != ~0U; ++itin) { + const InstrStage *IS = InstrItins.Stages + itin->FirstStage; + allStage1Units |= IS->getUnits(); + } + InstrItins.IssueWidth = 0; + while (allStage1Units) { + ++InstrItins.IssueWidth; + // clear the lowest bit + allStage1Units ^= allStage1Units & ~(allStage1Units - 1); + } + assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units"); +} + bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtarget::AntiDepBreakMode& Mode, diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 67e5803..76c1c3f 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -17,7 +17,7 @@ #include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtarget.h" -#include "ARMBaseRegisterInfo.h" +#include "llvm/ADT/Triple.h" #include <string> namespace llvm { @@ -29,6 +29,10 @@ protected: V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M }; + enum ARMProcFamilyEnum { + Others, CortexA8, CortexA9 + }; + enum ARMFPEnum { None, VFPv2, VFPv3, NEON }; @@ -42,6 +46,9 @@ protected: /// V6, V6T2, V7A, V7M. ARMArchEnum ARMArchVersion; + /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. + ARMProcFamilyEnum ARMProcFamily; + /// ARMFPUType - Floating Point Unit type. ARMFPEnum ARMFPUType; @@ -50,9 +57,9 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; - /// SlowVMLx - If the VFP2 instructions are available, indicates whether - /// the VML[AS] instructions are slow (if so, don't use them). - bool SlowVMLx; + /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates + /// whether the FP VML[AS] instructions are slow (if so, don't use them). + bool SlowFPVMLx; /// SlowFPBrcc - True if floating point compare + branch is slow. bool SlowFPBrcc; @@ -80,6 +87,10 @@ protected: /// only so far) bool HasFP16; + /// HasD16 - True if subtarget is limited to 16 double precision + /// FP registers for VFPv3. + bool HasD16; + /// HasHardwareDivide - True if subtarget supports [su]div bool HasHardwareDivide; @@ -95,10 +106,19 @@ protected: /// over 16-bit ones. bool Pref32BitThumb; + /// HasMPExtension - True if the subtarget supports Multiprocessing + /// extension (ARMv7 only). + bool HasMPExtension; + /// FPOnlySP - If true, the floating point unit only supports single /// precision. bool FPOnlySP; + /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory + /// accesses for some types. For details, see + /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). + bool AllowsUnalignedMem; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -106,6 +126,9 @@ protected: /// CPUString - String name of used CPU. std::string CPUString; + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; + /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; @@ -136,6 +159,8 @@ protected: std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); + void computeIssueWidth(); + bool hasV4TOps() const { return ARMArchVersion >= V4T; } bool hasV5TOps() const { return ARMArchVersion >= V5T; } bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } @@ -143,6 +168,9 @@ protected: bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } + bool isCortexA8() const { return ARMProcFamily == CortexA8; } + bool isCortexA9() const { return ARMProcFamily == CortexA9; } + bool hasARMOps() const { return !NoARM; } bool hasVFP2() const { return ARMFPUType >= VFPv2; } @@ -153,15 +181,17 @@ protected: bool hasDivide() const { return HasHardwareDivide; } bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } - bool useVMLx() const {return hasVFP2() && !SlowVMLx; } + bool useFPVMLx() const { return !SlowFPVMLx; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } + bool hasMPExtension() const { return HasMPExtension; } bool hasFP16() const { return HasFP16; } + bool hasD16() const { return HasD16; } - bool isTargetDarwin() const { return TargetType == isDarwin; } - bool isTargetELF() const { return TargetType == isELF; } + bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + bool isTargetELF() const { return !isTargetDarwin(); } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } @@ -175,8 +205,12 @@ protected: bool useMovt() const { return UseMovt && hasV6T2Ops(); } + bool allowsUnalignedMem() const { return AllowsUnalignedMem; } + const std::string & getCPUString() const { return CPUString; } + unsigned getMispredictionPenalty() const; + /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtarget::AntiDepBreakMode& Mode, diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 30ff827..0ee773b 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -12,15 +12,18 @@ #include "ARMTargetMachine.h" #include "ARMMCAsmInfo.h" -#include "ARMFrameInfo.h" +#include "ARMFrameLowering.h" #include "ARM.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; +static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden); + static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -31,6 +34,26 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } +// This is duplicated code. Refactor this. +static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, + MCContext &Ctx, TargetAsmBackend &TAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + switch (Triple(TT).getOS()) { + case Triple::Darwin: + return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); + case Triple::MinGW32: + case Triple::Cygwin: + case Triple::Win32: + llvm_unreachable("ARM does not support Windows COFF format"); + return NULL; + default: + return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); + } +} + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -39,6 +62,19 @@ extern "C" void LLVMInitializeARMTarget() { // Register the target asm info. RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo); RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter); + TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend); + TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer); + TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer); + } /// TargetMachine ctor - Create an ARM architecture model. @@ -49,9 +85,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, bool isThumb) : LLVMTargetMachine(T, TT), Subtarget(TT, FS, isThumb), - FrameInfo(Subtarget), JITInfo(), - InstrItins(Subtarget.getInstrItineraryData()) { + InstrItins(Subtarget.getInstrItineraryData()) +{ DefRelocModel = getRelocationModel(); } @@ -59,12 +95,14 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:32-i64:32:32-" + std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "v128:64:128-v64:64:64-n32")), + ELFWriterInfo(*this), TLInfo(*this), - TSInfo(*this) { + TSInfo(*this), + FrameLowering(Subtarget) { if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -77,14 +115,18 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:32-i64:32:32-" + std::string("e-p:32:32-f64:32:64-i64:32:64-" "i16:16:32-i8:8:32-i1:8:32-" "v128:32:128-v64:32:64-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-" "v128:64:128-v64:64:64-a:0:32-n32")), + ELFWriterInfo(*this), TLInfo(*this), - TSInfo(*this) { + TSInfo(*this), + FrameLowering(Subtarget.hasThumb2() + ? new ARMFrameLowering(Subtarget) + : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } // Pass Pipeline Configuration @@ -104,12 +146,12 @@ bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (Subtarget.hasNEON()) - PM.add(createNEONPreAllocPass()); - // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); + if (ExpandMLx && + OptLevel != CodeGenOpt::None && Subtarget.hasVFP2()) + PM.add(createMLxExpansionPass()); return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 17e5425..e0aa149 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -14,16 +14,19 @@ #ifndef ARMTARGETMACHINE_H #define ARMTARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" #include "ARMInstrInfo.h" -#include "ARMFrameInfo.h" +#include "ARMELFWriterInfo.h" +#include "ARMFrameLowering.h" #include "ARMJITInfo.h" #include "ARMSubtarget.h" #include "ARMISelLowering.h" #include "ARMSelectionDAGInfo.h" #include "Thumb1InstrInfo.h" +#include "Thumb1FrameLowering.h" #include "Thumb2InstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/ADT/OwningPtr.h" namespace llvm { @@ -31,9 +34,7 @@ namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { protected: ARMSubtarget Subtarget; - private: - ARMFrameInfo FrameInfo; ARMJITInfo JITInfo; InstrItineraryData InstrItins; Reloc::Model DefRelocModel; // Reloc model before it's overridden. @@ -42,11 +43,10 @@ public: ARMBaseTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool isThumb); - virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData getInstrItineraryData() const { - return InstrItins; + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; } // Pass Pipeline Configuration @@ -64,9 +64,11 @@ public: class ARMTargetMachine : public ARMBaseTargetMachine { ARMInstrInfo InstrInfo; const TargetData DataLayout; // Calculates type size & alignment + ARMELFWriterInfo ELFWriterInfo; ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; -public: + ARMFrameLowering FrameLowering; + public: ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -81,9 +83,15 @@ public: virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } + virtual const ARMFrameLowering *getFrameLowering() const { + return &FrameLowering; + } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const ARMELFWriterInfo *getELFWriterInfo() const { + return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; + } }; /// ThumbTargetMachine - Thumb target machine. @@ -94,8 +102,11 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { // Either Thumb1InstrInfo or Thumb2InstrInfo. OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment + ARMELFWriterInfo ELFWriterInfo; ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; + // Either Thumb1FrameLowering or ARMFrameLowering. + OwningPtr<ARMFrameLowering> FrameLowering; public: ThumbTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -117,7 +128,14 @@ public: virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); } + /// returns either Thumb1FrameLowering or ARMFrameLowering + virtual const ARMFrameLowering *getFrameLowering() const { + return FrameLowering.get(); + } virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const ARMELFWriterInfo *getELFWriterInfo() const { + return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; + } }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 091a3b3..7535da5 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -12,6 +12,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; using namespace dwarf; @@ -26,14 +27,20 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { StaticCtorSection = - getContext().getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, - MCSectionELF::SHF_WRITE | - MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, SectionKind::getDataRel()); StaticDtorSection = - getContext().getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY, - MCSectionELF::SHF_WRITE | - MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, SectionKind::getDataRel()); } + + AttributesSection = + getContext().getELFSection(".ARM.attributes", + ELF::SHT_ARM_ATTRIBUTES, + 0, + SectionKind::getMetadata()); } diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index 097fc2c..c6a7261 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -18,10 +18,19 @@ class MCContext; class TargetMachine; class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { +protected: + const MCSection *AttributesSection; public: - ARMElfTargetObjectFile() : TargetLoweringObjectFileELF() {} + ARMElfTargetObjectFile() : + TargetLoweringObjectFileELF(), + AttributesSection(NULL) + {} virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + virtual const MCSection *getAttributesSection() const { + return AttributesSection; + } }; } // end namespace llvm diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index f859d1b..2428ce1 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -10,10 +10,6 @@ #include "ARM.h" #include "ARMTargetMachine.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" - #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -22,119 +18,135 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" + #include <string> #include <map> using namespace llvm; namespace { - - class ARMBaseAsmLexer : public TargetAsmLexer { - const MCAsmInfo &AsmInfo; - - const AsmToken &lexDefinite() { - return getLexer()->Lex(); - } - - AsmToken LexTokenUAL(); - protected: - typedef std::map <std::string, unsigned> rmap_ty; - - rmap_ty RegisterMap; - - void InitRegisterMap(const TargetRegisterInfo *info) { - unsigned numRegs = info->getNumRegs(); - - for (unsigned i = 0; i < numRegs; ++i) { - const char *regName = info->getName(i); - if (regName) - RegisterMap[regName] = i; - } - } - - unsigned MatchRegisterName(StringRef Name) { - rmap_ty::iterator iter = RegisterMap.find(Name.str()); - if (iter != RegisterMap.end()) - return iter->second; - else - return 0; - } - - AsmToken LexToken() { - if (!Lexer) { - SetError(SMLoc(), "No MCAsmLexer installed"); - return AsmToken(AsmToken::Error, "", 0); - } - - switch (AsmInfo.getAssemblerDialect()) { - default: - SetError(SMLoc(), "Unhandled dialect"); - return AsmToken(AsmToken::Error, "", 0); - case 0: - return LexTokenUAL(); - } - } - public: - ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI) { + +class ARMBaseAsmLexer : public TargetAsmLexer { + const MCAsmInfo &AsmInfo; + + const AsmToken &lexDefinite() { + return getLexer()->Lex(); + } + + AsmToken LexTokenUAL(); +protected: + typedef std::map <std::string, unsigned> rmap_ty; + + rmap_ty RegisterMap; + + void InitRegisterMap(const TargetRegisterInfo *info) { + unsigned numRegs = info->getNumRegs(); + + for (unsigned i = 0; i < numRegs; ++i) { + const char *regName = info->getName(i); + if (regName) + RegisterMap[regName] = i; } - }; - - class ARMAsmLexer : public ARMBaseAsmLexer { - public: - ARMAsmLexer(const Target &T, const MCAsmInfo &MAI) - : ARMBaseAsmLexer(T, MAI) { - std::string tripleString("arm-unknown-unknown"); - std::string featureString; - OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, featureString)); - InitRegisterMap(targetMachine->getRegisterInfo()); + } + + unsigned MatchRegisterName(StringRef Name) { + rmap_ty::iterator iter = RegisterMap.find(Name.str()); + if (iter != RegisterMap.end()) + return iter->second; + else + return 0; + } + + AsmToken LexToken() { + if (!Lexer) { + SetError(SMLoc(), "No MCAsmLexer installed"); + return AsmToken(AsmToken::Error, "", 0); } - }; - - class ThumbAsmLexer : public ARMBaseAsmLexer { - public: - ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI) - : ARMBaseAsmLexer(T, MAI) { - std::string tripleString("thumb-unknown-unknown"); - std::string featureString; - OwningPtr<const TargetMachine> - targetMachine(T.createTargetMachine(tripleString, featureString)); - InitRegisterMap(targetMachine->getRegisterInfo()); + + switch (AsmInfo.getAssemblerDialect()) { + default: + SetError(SMLoc(), "Unhandled dialect"); + return AsmToken(AsmToken::Error, "", 0); + case 0: + return LexTokenUAL(); } - }; -} + } +public: + ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) + : TargetAsmLexer(T), AsmInfo(MAI) { + } +}; + +class ARMAsmLexer : public ARMBaseAsmLexer { +public: + ARMAsmLexer(const Target &T, const MCAsmInfo &MAI) + : ARMBaseAsmLexer(T, MAI) { + std::string tripleString("arm-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } +}; + +class ThumbAsmLexer : public ARMBaseAsmLexer { +public: + ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI) + : ARMBaseAsmLexer(T, MAI) { + std::string tripleString("thumb-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } +}; + +} // end anonymous namespace AsmToken ARMBaseAsmLexer::LexTokenUAL() { const AsmToken &lexedToken = lexDefinite(); - + switch (lexedToken.getKind()) { - default: - return AsmToken(lexedToken); + default: break; case AsmToken::Error: SetError(Lexer->getErrLoc(), Lexer->getErr()); - return AsmToken(lexedToken); - case AsmToken::Identifier: - { + break; + case AsmToken::Identifier: { std::string upperCase = lexedToken.getString().str(); std::string lowerCase = LowercaseString(upperCase); StringRef lowerRef(lowerCase); - + unsigned regID = MatchRegisterName(lowerRef); - - if (regID) { + // Check for register aliases. + // r13 -> sp + // r14 -> lr + // r15 -> pc + // ip -> r12 + // FIXME: Some assemblers support lots of others. Do we want them all? + if (!regID) { + regID = StringSwitch<unsigned>(lowerCase) + .Case("r13", ARM::SP) + .Case("r14", ARM::LR) + .Case("r15", ARM::PC) + .Case("ip", ARM::R12) + .Default(0); + } + + if (regID) return AsmToken(AsmToken::Register, lexedToken.getString(), static_cast<int64_t>(regID)); - } else { - return AsmToken(lexedToken); - } } } + + return AsmToken(lexedToken); } extern "C" void LLVMInitializeARMAsmLexer() { RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget); RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); } - diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 75e2a73..129af20 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -8,28 +8,28 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMMCExpr.h" +#include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmParser.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" using namespace llvm; -namespace { -struct ARMOperand; - -// The shift types for register controlled shifts in arm memory addressing +/// Shift types used for register controlled shifts in ARM memory addressing. enum ShiftType { Lsl, Lsr, @@ -38,24 +38,30 @@ enum ShiftType { Rrx }; +namespace { + +class ARMOperand; + class ARMAsmParser : public TargetAsmParser { MCAsmParser &Parser; TargetMachine &TM; -private: MCAsmParser &getParser() const { return Parser; } - MCAsmLexer &getLexer() const { return Parser.getLexer(); } void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - bool MaybeParseRegister(OwningPtr<ARMOperand> &Op, bool ParseWriteBack); + int TryParseRegister(); + virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); + bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); + bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); + bool ParsePrefix(ARMMCExpr::VariantKind &RefKind); + const MCExpr *ApplyPrefixToExpr(const MCExpr *E, + MCSymbolRefExpr::VariantKind Variant); - bool ParseRegisterList(OwningPtr<ARMOperand> &Op); - - bool ParseMemory(OwningPtr<ARMOperand> &Op); bool ParseMemoryOffsetReg(bool &Negative, bool &OffsetRegShifted, @@ -65,70 +71,76 @@ private: bool &OffsetIsReg, int &OffsetRegNum, SMLoc &E); - bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); - - bool ParseOperand(OwningPtr<ARMOperand> &Op); - bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool ParseDirectiveThumb(SMLoc L); - bool ParseDirectiveThumbFunc(SMLoc L); - bool ParseDirectiveCode(SMLoc L); - bool ParseDirectiveSyntax(SMLoc L); - bool MatchInstruction(SMLoc IDLoc, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst) { - if (!MatchInstructionImpl(Operands, Inst)) - return false; - - // FIXME: We should give nicer diagnostics about the exact failure. - Error(IDLoc, "unrecognized instruction"); - - return true; - } + bool MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out); + void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, + bool &CanAcceptPredicationCode); /// @name Auto-generated Match Functions /// { - unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const; - - bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*> - &Operands, - MCInst &Inst); +#define GET_ASSEMBLER_HEADER +#include "ARMGenAsmMatcher.inc" /// } + OperandMatchResultTy tryParseCoprocNumOperand( + SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy tryParseCoprocRegOperand( + SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy tryParseMemBarrierOptOperand( + SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy tryParseProcIFlagsOperand( + SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy tryParseMSRMaskOperand( + SmallVectorImpl<MCParsedAsmOperand*>&); public: ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) - : TargetAsmParser(T), Parser(_Parser), TM(_TM) {} + : TargetAsmParser(T), Parser(_Parser), TM(_TM) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures( + &TM.getSubtarget<ARMSubtarget>())); + } virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); - virtual bool ParseDirective(AsmToken DirectiveID); }; - +} // end anonymous namespace + +namespace { + /// ARMOperand - Instances of this class represent a parsed ARM machine /// instruction. -struct ARMOperand : public MCParsedAsmOperand { -private: - ARMOperand() {} -public: +class ARMOperand : public MCParsedAsmOperand { enum KindTy { CondCode, + CCOut, + CoprocNum, + CoprocReg, Immediate, + MemBarrierOpt, Memory, + MSRMask, + ProcIFlags, Register, + RegisterList, + DPRRegisterList, + SPRRegisterList, Token } Kind; SMLoc StartLoc, EndLoc; + SmallVector<unsigned, 8> Registers; union { struct { @@ -136,40 +148,54 @@ public: } CC; struct { + ARM_MB::MemBOpt Val; + } MBOpt; + + struct { + unsigned Val; + } Cop; + + struct { + ARM_PROC::IFlags Val; + } IFlags; + + struct { + unsigned Val; + } MMask; + + struct { const char *Data; unsigned Length; } Tok; struct { unsigned RegNum; - bool Writeback; } Reg; struct { const MCExpr *Val; } Imm; - - // This is for all forms of ARM address expressions + + /// Combined record for all forms of ARM address expressions. struct { unsigned BaseRegNum; - unsigned OffsetRegNum; // used when OffsetIsReg is true - const MCExpr *Offset; // used when OffsetIsReg is false - const MCExpr *ShiftAmount; // used when OffsetRegShifted is true - enum ShiftType ShiftType; // used when OffsetRegShifted is true - unsigned - OffsetRegShifted : 1, // only used when OffsetIsReg is true - Preindexed : 1, - Postindexed : 1, - OffsetIsReg : 1, - Negative : 1, // only used when OffsetIsReg is true - Writeback : 1; + union { + unsigned RegNum; ///< Offset register num, when OffsetIsReg. + const MCExpr *Value; ///< Offset value, when !OffsetIsReg. + } Offset; + const MCExpr *ShiftAmount; // used when OffsetRegShifted is true + enum ShiftType ShiftType; // used when OffsetRegShifted is true + unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true + unsigned Preindexed : 1; + unsigned Postindexed : 1; + unsigned OffsetIsReg : 1; + unsigned Negative : 1; // only used when OffsetIsReg is true + unsigned Writeback : 1; } Mem; - }; - - //ARMOperand(KindTy K, SMLoc S, SMLoc E) - // : Kind(K), StartLoc(S), EndLoc(E) {} - + + ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} +public: ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; @@ -181,18 +207,36 @@ public: case Token: Tok = o.Tok; break; + case CCOut: case Register: Reg = o.Reg; break; + case RegisterList: + case DPRRegisterList: + case SPRRegisterList: + Registers = o.Registers; + break; + case CoprocNum: + case CoprocReg: + Cop = o.Cop; + break; case Immediate: Imm = o.Imm; break; + case MemBarrierOpt: + MBOpt = o.MBOpt; + break; case Memory: Mem = o.Mem; break; + case MSRMask: + MMask = o.MMask; + break; + case ProcIFlags: + IFlags = o.IFlags; } } - + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. @@ -203,32 +247,129 @@ public: return CC.Val; } + unsigned getCoproc() const { + assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!"); + return Cop.Val; + } + StringRef getToken() const { assert(Kind == Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); } unsigned getReg() const { - assert(Kind == Register && "Invalid access!"); + assert((Kind == Register || Kind == CCOut) && "Invalid access!"); return Reg.RegNum; } + const SmallVectorImpl<unsigned> &getRegList() const { + assert((Kind == RegisterList || Kind == DPRRegisterList || + Kind == SPRRegisterList) && "Invalid access!"); + return Registers; + } + const MCExpr *getImm() const { assert(Kind == Immediate && "Invalid access!"); return Imm.Val; } - bool isCondCode() const { return Kind == CondCode; } + ARM_MB::MemBOpt getMemBarrierOpt() const { + assert(Kind == MemBarrierOpt && "Invalid access!"); + return MBOpt.Val; + } - bool isImm() const { return Kind == Immediate; } + ARM_PROC::IFlags getProcIFlags() const { + assert(Kind == ProcIFlags && "Invalid access!"); + return IFlags.Val; + } + + unsigned getMSRMask() const { + assert(Kind == MSRMask && "Invalid access!"); + return MMask.Val; + } + + /// @name Memory Operand Accessors + /// @{ + + unsigned getMemBaseRegNum() const { + return Mem.BaseRegNum; + } + unsigned getMemOffsetRegNum() const { + assert(Mem.OffsetIsReg && "Invalid access!"); + return Mem.Offset.RegNum; + } + const MCExpr *getMemOffset() const { + assert(!Mem.OffsetIsReg && "Invalid access!"); + return Mem.Offset.Value; + } + unsigned getMemOffsetRegShifted() const { + assert(Mem.OffsetIsReg && "Invalid access!"); + return Mem.OffsetRegShifted; + } + const MCExpr *getMemShiftAmount() const { + assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); + return Mem.ShiftAmount; + } + enum ShiftType getMemShiftType() const { + assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); + return Mem.ShiftType; + } + bool getMemPreindexed() const { return Mem.Preindexed; } + bool getMemPostindexed() const { return Mem.Postindexed; } + bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; } + bool getMemNegative() const { return Mem.Negative; } + bool getMemWriteback() const { return Mem.Writeback; } + + /// @} + bool isCoprocNum() const { return Kind == CoprocNum; } + bool isCoprocReg() const { return Kind == CoprocReg; } + bool isCondCode() const { return Kind == CondCode; } + bool isCCOut() const { return Kind == CCOut; } + bool isImm() const { return Kind == Immediate; } bool isReg() const { return Kind == Register; } + bool isRegList() const { return Kind == RegisterList; } + bool isDPRRegList() const { return Kind == DPRRegisterList; } + bool isSPRRegList() const { return Kind == SPRRegisterList; } + bool isToken() const { return Kind == Token; } + bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; } + bool isMemory() const { return Kind == Memory; } + bool isMemMode5() const { + if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() || + getMemNegative()) + return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!CE) return false; + + // The offset must be a multiple of 4 in the range 0-1020. + int64_t Value = CE->getValue(); + return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); + } + bool isMemModeRegThumb() const { + if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback()) + return false; + return true; + } + bool isMemModeImmThumb() const { + if (!isMemory() || getMemOffsetIsReg() || getMemWriteback()) + return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!CE) return false; - bool isToken() const {return Kind == Token; } + // The offset must be a multiple of 4 in the range 0-124. + uint64_t Value = CE->getValue(); + return ((Value & 0x3) == 0 && Value <= 124); + } + bool isMSRMask() const { return Kind == MSRMask; } + bool isProcIFlags() const { return Kind == ProcIFlags; } void addExpr(MCInst &Inst, const MCExpr *Expr) const { - // Add as immediates when possible. - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + // Add as immediates when possible. Null MCExpr = 0. + if (Expr == 0) + Inst.addOperand(MCOperand::CreateImm(0)); + else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) Inst.addOperand(MCOperand::CreateImm(CE->getValue())); else Inst.addOperand(MCOperand::CreateExpr(Expr)); @@ -237,8 +378,23 @@ public: void addCondCodeOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode()))); - // FIXME: What belongs here? - Inst.addOperand(MCOperand::CreateReg(0)); + unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR; + Inst.addOperand(MCOperand::CreateReg(RegNum)); + } + + void addCoprocNumOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getCoproc())); + } + + void addCoprocRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getCoproc())); + } + + void addCCOutOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); } void addRegOperands(MCInst &Inst, unsigned N) const { @@ -246,66 +402,181 @@ public: Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addRegListOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const SmallVectorImpl<unsigned> &RegList = getRegList(); + for (SmallVectorImpl<unsigned>::const_iterator + I = RegList.begin(), E = RegList.end(); I != E; ++I) + Inst.addOperand(MCOperand::CreateReg(*I)); + } + + void addDPRRegListOperands(MCInst &Inst, unsigned N) const { + addRegListOperands(Inst, N); + } + + void addSPRRegListOperands(MCInst &Inst, unsigned N) const { + addRegListOperands(Inst, N); + } + void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); } + void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); + } + + void addMemMode5Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && isMemMode5() && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + assert(!getMemOffsetIsReg() && "Invalid mode 5 operand"); + + // FIXME: #-0 is encoded differently than #0. Does the parser preserve + // the difference? + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + assert(CE && "Non-constant mode 5 offset operand!"); + + // The MCInst offset operand doesn't include the low two bits (like + // the instruction encoding). + int64_t Offset = CE->getValue() / 4; + if (Offset >= 0) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add, + Offset))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub, + -Offset))); + } + + void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + } + + void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + assert(CE && "Non-constant mode offset operand!"); + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + } + + void addMSRMaskOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask()))); + } + + void addProcIFlagsOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); + } + virtual void dump(raw_ostream &OS) const; - static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC, - SMLoc S) { - Op.reset(new ARMOperand); - Op->Kind = CondCode; + static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { + ARMOperand *Op = new ARMOperand(CondCode); Op->CC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) { + ARMOperand *Op = new ARMOperand(CoprocNum); + Op->Cop.Val = CopVal; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; } - static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str, - SMLoc S) { - Op.reset(new ARMOperand); - Op->Kind = Token; + static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) { + ARMOperand *Op = new ARMOperand(CoprocReg); + Op->Cop.Val = CopVal; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) { + ARMOperand *Op = new ARMOperand(CCOut); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateToken(StringRef Str, SMLoc S) { + ARMOperand *Op = new ARMOperand(Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; + return Op; } - static void CreateReg(OwningPtr<ARMOperand> &Op, unsigned RegNum, - bool Writeback, SMLoc S, SMLoc E) { - Op.reset(new ARMOperand); - Op->Kind = Register; + static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(Register); Op->Reg.RegNum = RegNum; - Op->Reg.Writeback = Writeback; - Op->StartLoc = S; Op->EndLoc = E; + return Op; } - static void CreateImm(OwningPtr<ARMOperand> &Op, const MCExpr *Val, - SMLoc S, SMLoc E) { - Op.reset(new ARMOperand); - Op->Kind = Immediate; + static ARMOperand * + CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, + SMLoc StartLoc, SMLoc EndLoc) { + KindTy Kind = RegisterList; + + if (ARM::DPRRegClass.contains(Regs.front().first)) + Kind = DPRRegisterList; + else if (ARM::SPRRegClass.contains(Regs.front().first)) + Kind = SPRRegisterList; + + ARMOperand *Op = new ARMOperand(Kind); + for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator + I = Regs.begin(), E = Regs.end(); I != E; ++I) + Op->Registers.push_back(I->first); + array_pod_sort(Op->Registers.begin(), Op->Registers.end()); + Op->StartLoc = StartLoc; + Op->EndLoc = EndLoc; + return Op; + } + + static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(Immediate); Op->Imm.Val = Val; - Op->StartLoc = S; Op->EndLoc = E; + return Op; } - static void CreateMem(OwningPtr<ARMOperand> &Op, - unsigned BaseRegNum, bool OffsetIsReg, - const MCExpr *Offset, unsigned OffsetRegNum, - bool OffsetRegShifted, enum ShiftType ShiftType, - const MCExpr *ShiftAmount, bool Preindexed, - bool Postindexed, bool Negative, bool Writeback, - SMLoc S, SMLoc E) { - Op.reset(new ARMOperand); - Op->Kind = Memory; + static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, + const MCExpr *Offset, int OffsetRegNum, + bool OffsetRegShifted, enum ShiftType ShiftType, + const MCExpr *ShiftAmount, bool Preindexed, + bool Postindexed, bool Negative, bool Writeback, + SMLoc S, SMLoc E) { + assert((OffsetRegNum == -1 || OffsetIsReg) && + "OffsetRegNum must imply OffsetIsReg!"); + assert((!OffsetRegShifted || OffsetIsReg) && + "OffsetRegShifted must imply OffsetIsReg!"); + assert((Offset || OffsetIsReg) && + "Offset must exists unless register offset is used!"); + assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) && + "Cannot have shift amount without shifted register offset!"); + assert((!Offset || !OffsetIsReg) && + "Cannot have expression offset and register offset!"); + + ARMOperand *Op = new ARMOperand(Memory); Op->Mem.BaseRegNum = BaseRegNum; Op->Mem.OffsetIsReg = OffsetIsReg; - Op->Mem.Offset = Offset; - Op->Mem.OffsetRegNum = OffsetRegNum; + if (OffsetIsReg) + Op->Mem.Offset.RegNum = OffsetRegNum; + else + Op->Mem.Offset.Value = Offset; Op->Mem.OffsetRegShifted = OffsetRegShifted; Op->Mem.ShiftType = ShiftType; Op->Mem.ShiftAmount = ShiftAmount; @@ -313,9 +584,34 @@ public: Op->Mem.Postindexed = Postindexed; Op->Mem.Negative = Negative; Op->Mem.Writeback = Writeback; - + Op->StartLoc = S; Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) { + ARMOperand *Op = new ARMOperand(MemBarrierOpt); + Op->MBOpt.Val = Opt; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { + ARMOperand *Op = new ARMOperand(ProcIFlags); + Op->IFlags.Val = IFlags; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) { + ARMOperand *Op = new ARMOperand(MSRMask); + Op->MMask.Val = MMask; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; } }; @@ -324,17 +620,77 @@ public: void ARMOperand::dump(raw_ostream &OS) const { switch (Kind) { case CondCode: - OS << ARMCondCodeToString(getCondCode()); + OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">"; + break; + case CCOut: + OS << "<ccout " << getReg() << ">"; + break; + case CoprocNum: + OS << "<coprocessor number: " << getCoproc() << ">"; + break; + case CoprocReg: + OS << "<coprocessor register: " << getCoproc() << ">"; + break; + case MSRMask: + OS << "<mask: " << getMSRMask() << ">"; break; case Immediate: getImm()->print(OS); break; + case MemBarrierOpt: + OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">"; + break; case Memory: - OS << "<memory>"; + OS << "<memory " + << "base:" << getMemBaseRegNum(); + if (getMemOffsetIsReg()) { + OS << " offset:<register " << getMemOffsetRegNum(); + if (getMemOffsetRegShifted()) { + OS << " offset-shift-type:" << getMemShiftType(); + OS << " offset-shift-amount:" << *getMemShiftAmount(); + } + } else { + OS << " offset:" << *getMemOffset(); + } + if (getMemOffsetIsReg()) + OS << " (offset-is-reg)"; + if (getMemPreindexed()) + OS << " (pre-indexed)"; + if (getMemPostindexed()) + OS << " (post-indexed)"; + if (getMemNegative()) + OS << " (negative)"; + if (getMemWriteback()) + OS << " (writeback)"; + OS << ">"; + break; + case ProcIFlags: { + OS << "<ARM_PROC::"; + unsigned IFlags = getProcIFlags(); + for (int i=2; i >= 0; --i) + if (IFlags & (1 << i)) + OS << ARM_PROC::IFlagsToString(1 << i); + OS << ">"; break; + } case Register: OS << "<register " << getReg() << ">"; break; + case RegisterList: + case DPRRegisterList: + case SPRRegisterList: { + OS << "<register_list "; + + const SmallVectorImpl<unsigned> &RegList = getRegList(); + for (SmallVectorImpl<unsigned>::const_iterator + I = RegList.begin(), E = RegList.end(); I != E; ) { + OS << *I; + if (++I < E) OS << ", "; + } + + OS << ">"; + break; + } case Token: OS << "'" << getToken() << "'"; break; @@ -348,184 +704,456 @@ static unsigned MatchRegisterName(StringRef Name); /// } +bool ARMAsmParser::ParseRegister(unsigned &RegNo, + SMLoc &StartLoc, SMLoc &EndLoc) { + RegNo = TryParseRegister(); + + return (RegNo == (unsigned)-1); +} + /// Try to parse a register name. The token must be an Identifier when called, -/// and if it is a register name a Reg operand is created, the token is eaten -/// and false is returned. Else true is returned and no token is eaten. -/// TODO this is likely to change to allow different register types and or to -/// parse for a specific register type. -bool ARMAsmParser::MaybeParseRegister - (OwningPtr<ARMOperand> &Op, bool ParseWriteBack) { - SMLoc S, E; +/// and if it is a register name the token is eaten and the register number is +/// returned. Otherwise return -1. +/// +int ARMAsmParser::TryParseRegister() { const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. - int RegNum; + std::string upperCase = Tok.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + unsigned RegNum = MatchRegisterName(lowerCase); + if (!RegNum) { + RegNum = StringSwitch<unsigned>(lowerCase) + .Case("r13", ARM::SP) + .Case("r14", ARM::LR) + .Case("r15", ARM::PC) + .Case("ip", ARM::R12) + .Default(0); + } + if (!RegNum) return -1; - RegNum = MatchRegisterName(Tok.getString()); - if (RegNum == -1) - return true; - - S = Tok.getLoc(); - Parser.Lex(); // Eat identifier token. - - E = Parser.getTok().getLoc(); + return RegNum; +} - bool Writeback = false; - if (ParseWriteBack) { - const AsmToken &ExclaimTok = Parser.getTok(); - if (ExclaimTok.is(AsmToken::Exclaim)) { - E = ExclaimTok.getLoc(); - Writeback = true; - Parser.Lex(); // Eat exclaim token +/// Try to parse a register name. The token must be an Identifier when called. +/// If it's a register, an AsmOperand is created. Another AsmOperand is created +/// if there is a "writeback". 'true' if it's not a register. +/// +/// TODO this is likely to change to allow different register types and or to +/// parse for a specific register type. +bool ARMAsmParser:: +TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int RegNo = TryParseRegister(); + if (RegNo == -1) + return true; + + Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc())); + + const AsmToken &ExclaimTok = Parser.getTok(); + if (ExclaimTok.is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(), + ExclaimTok.getLoc())); + Parser.Lex(); // Eat exclaim token + } + + return false; +} + +/// MatchCoprocessorOperandName - Try to parse an coprocessor related +/// instruction with a symbolic operand name. Example: "p1", "p7", "c3", +/// "c5", ... +static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { + // Use the same layout as the tablegen'erated register name matcher. Ugly, + // but efficient. + switch (Name.size()) { + default: break; + case 2: + if (Name[0] != CoprocOp) + return -1; + switch (Name[1]) { + default: return -1; + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + } + break; + case 3: + if (Name[0] != CoprocOp || Name[1] != '1') + return -1; + switch (Name[2]) { + default: return -1; + case '0': return 10; + case '1': return 11; + case '2': return 12; + case '3': return 13; + case '4': return 14; + case '5': return 15; } + break; } - ARMOperand::CreateReg(Op, RegNum, Writeback, S, E); + return -1; +} - return false; +/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The +/// token must be an Identifier when called, and if it is a coprocessor +/// number, the token is eaten and the operand is added to the operand list. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + + int Num = MatchCoprocessorOperandName(Tok.getString(), 'p'); + if (Num == -1) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + Operands.push_back(ARMOperand::CreateCoprocNum(Num, S)); + return MatchOperand_Success; } -/// Parse a register list, return false if successful else return true or an -/// error. The first token must be a '{' when called. -bool ARMAsmParser::ParseRegisterList(OwningPtr<ARMOperand> &Op) { - SMLoc S, E; - assert(Parser.getTok().is(AsmToken::LCurly) && - "Token is not an Left Curly Brace"); - S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat left curly brace token. - - const AsmToken &RegTok = Parser.getTok(); - SMLoc RegLoc = RegTok.getLoc(); - if (RegTok.isNot(AsmToken::Identifier)) - return Error(RegLoc, "register expected"); - int RegNum = MatchRegisterName(RegTok.getString()); - if (RegNum == -1) - return Error(RegLoc, "register expected"); +/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The +/// token must be an Identifier when called, and if it is a coprocessor +/// number, the token is eaten and the operand is added to the operand list. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + + int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c'); + if (Reg == -1) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat identifier token. - unsigned RegList = 1 << RegNum; + Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S)); + return MatchOperand_Success; +} - int HighRegNum = RegNum; - // TODO ranges like "{Rn-Rm}" - while (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); // Eat comma token. +/// Parse a register list, return it if successful else return null. The first +/// token must be a '{' when called. +bool ARMAsmParser:: +ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + assert(Parser.getTok().is(AsmToken::LCurly) && + "Token is not a Left Curly Brace"); + SMLoc S = Parser.getTok().getLoc(); + + // Read the rest of the registers in the list. + unsigned PrevRegNum = 0; + SmallVector<std::pair<unsigned, SMLoc>, 32> Registers; + + do { + bool IsRange = Parser.getTok().is(AsmToken::Minus); + Parser.Lex(); // Eat non-identifier token. const AsmToken &RegTok = Parser.getTok(); SMLoc RegLoc = RegTok.getLoc(); - if (RegTok.isNot(AsmToken::Identifier)) - return Error(RegLoc, "register expected"); - int RegNum = MatchRegisterName(RegTok.getString()); - if (RegNum == -1) - return Error(RegLoc, "register expected"); + if (RegTok.isNot(AsmToken::Identifier)) { + Error(RegLoc, "register expected"); + return true; + } - if (RegList & (1 << RegNum)) - Warning(RegLoc, "register duplicated in register list"); - else if (RegNum <= HighRegNum) - Warning(RegLoc, "register not in ascending order in register list"); - RegList |= 1 << RegNum; - HighRegNum = RegNum; + int RegNum = TryParseRegister(); + if (RegNum == -1) { + Error(RegLoc, "register expected"); + return true; + } - Parser.Lex(); // Eat identifier token. - } + if (IsRange) { + int Reg = PrevRegNum; + do { + ++Reg; + Registers.push_back(std::make_pair(Reg, RegLoc)); + } while (Reg != RegNum); + } else { + Registers.push_back(std::make_pair(RegNum, RegLoc)); + } + + PrevRegNum = RegNum; + } while (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::Minus)); + + // Process the right curly brace of the list. const AsmToken &RCurlyTok = Parser.getTok(); - if (RCurlyTok.isNot(AsmToken::RCurly)) - return Error(RCurlyTok.getLoc(), "'}' expected"); - E = RCurlyTok.getLoc(); - Parser.Lex(); // Eat left curly brace token. + if (RCurlyTok.isNot(AsmToken::RCurly)) { + Error(RCurlyTok.getLoc(), "'}' expected"); + return true; + } + + SMLoc E = RCurlyTok.getLoc(); + Parser.Lex(); // Eat right curly brace token. + + // Verify the register list. + SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator + RI = Registers.begin(), RE = Registers.end(); + + unsigned HighRegNum = getARMRegisterNumbering(RI->first); + bool EmittedWarning = false; + + DenseMap<unsigned, bool> RegMap; + RegMap[HighRegNum] = true; + for (++RI; RI != RE; ++RI) { + const std::pair<unsigned, SMLoc> &RegInfo = *RI; + unsigned Reg = getARMRegisterNumbering(RegInfo.first); + + if (RegMap[Reg]) { + Error(RegInfo.second, "register duplicated in register list"); + return true; + } + + if (!EmittedWarning && Reg < HighRegNum) + Warning(RegInfo.second, + "register not in ascending order in register list"); + + RegMap[Reg] = true; + HighRegNum = std::max(Reg, HighRegNum); + } + + Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); return false; } -/// Parse an arm memory expression, return false if successful else return true +/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + StringRef OptStr = Tok.getString(); + + unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size())) + .Case("sy", ARM_MB::SY) + .Case("st", ARM_MB::ST) + .Case("ish", ARM_MB::ISH) + .Case("ishst", ARM_MB::ISHST) + .Case("nsh", ARM_MB::NSH) + .Case("nshst", ARM_MB::NSHST) + .Case("osh", ARM_MB::OSH) + .Case("oshst", ARM_MB::OSHST) + .Default(~0U); + + if (Opt == ~0U) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S)); + return MatchOperand_Success; +} + +/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + StringRef IFlagsStr = Tok.getString(); + + unsigned IFlags = 0; + for (int i = 0, e = IFlagsStr.size(); i != e; ++i) { + unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1)) + .Case("a", ARM_PROC::A) + .Case("i", ARM_PROC::I) + .Case("f", ARM_PROC::F) + .Default(~0U); + + // If some specific iflag is already set, it means that some letter is + // present more than once, this is not acceptable. + if (Flag == ~0U || (IFlags & Flag)) + return MatchOperand_NoMatch; + + IFlags |= Flag; + } + + Parser.Lex(); // Eat identifier token. + Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S)); + return MatchOperand_Success; +} + +/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + StringRef Mask = Tok.getString(); + + // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf" + size_t Start = 0, Next = Mask.find('_'); + StringRef Flags = ""; + StringRef SpecReg = Mask.slice(Start, Next); + if (Next != StringRef::npos) + Flags = Mask.slice(Next+1, Mask.size()); + + // FlagsVal contains the complete mask: + // 3-0: Mask + // 4: Special Reg (cpsr, apsr => 0; spsr => 1) + unsigned FlagsVal = 0; + + if (SpecReg == "apsr") { + FlagsVal = StringSwitch<unsigned>(Flags) + .Case("nzcvq", 0x8) // same as CPSR_c + .Case("g", 0x4) // same as CPSR_s + .Case("nzcvqg", 0xc) // same as CPSR_fs + .Default(~0U); + + if (FlagsVal == ~0U) { + if (!Flags.empty()) + return MatchOperand_NoMatch; + else + FlagsVal = 0; // No flag + } + } else if (SpecReg == "cpsr" || SpecReg == "spsr") { + for (int i = 0, e = Flags.size(); i != e; ++i) { + unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1)) + .Case("c", 1) + .Case("x", 2) + .Case("s", 4) + .Case("f", 8) + .Default(~0U); + + // If some specific flag is already set, it means that some letter is + // present more than once, this is not acceptable. + if (FlagsVal == ~0U || (FlagsVal & Flag)) + return MatchOperand_NoMatch; + FlagsVal |= Flag; + } + } else // No match for special register. + return MatchOperand_NoMatch; + + // Special register without flags are equivalent to "fc" flags. + if (!FlagsVal) + FlagsVal = 0x9; + + // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1) + if (SpecReg == "spsr") + FlagsVal |= 16; + + Parser.Lex(); // Eat identifier token. + Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S)); + return MatchOperand_Success; +} + +/// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. +/// /// TODO Only preindexing and postindexing addressing are started, unindexed /// with option, etc are still to do. -bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) { +bool ARMAsmParser:: +ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && - "Token is not an Left Bracket"); + "Token is not a Left Bracket"); S = Parser.getTok().getLoc(); Parser.Lex(); // Eat left bracket token. const AsmToken &BaseRegTok = Parser.getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) - return Error(BaseRegTok.getLoc(), "register expected"); - if (MaybeParseRegister(Op, false)) - return Error(BaseRegTok.getLoc(), "register expected"); - int BaseRegNum = Op->getReg(); + if (BaseRegTok.isNot(AsmToken::Identifier)) { + Error(BaseRegTok.getLoc(), "register expected"); + return true; + } + int BaseRegNum = TryParseRegister(); + if (BaseRegNum == -1) { + Error(BaseRegTok.getLoc(), "register expected"); + return true; + } + + // The next token must either be a comma or a closing bracket. + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac)) + return true; bool Preindexed = false; bool Postindexed = false; bool OffsetIsReg = false; bool Negative = false; bool Writeback = false; + ARMOperand *WBOp = 0; + int OffsetRegNum = -1; + bool OffsetRegShifted = false; + enum ShiftType ShiftType = Lsl; + const MCExpr *ShiftAmount = 0; + const MCExpr *Offset = 0; // First look for preindexed address forms, that is after the "[Rn" we now // have to see if the next token is a comma. - const AsmToken &Tok = Parser.getTok(); if (Tok.is(AsmToken::Comma)) { Preindexed = true; Parser.Lex(); // Eat comma token. - int OffsetRegNum; - bool OffsetRegShifted; - enum ShiftType ShiftType; - const MCExpr *ShiftAmount; - const MCExpr *Offset; - if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, - Offset, OffsetIsReg, OffsetRegNum, E)) + + if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, + Offset, OffsetIsReg, OffsetRegNum, E)) return true; const AsmToken &RBracTok = Parser.getTok(); - if (RBracTok.isNot(AsmToken::RBrac)) - return Error(RBracTok.getLoc(), "']' expected"); + if (RBracTok.isNot(AsmToken::RBrac)) { + Error(RBracTok.getLoc(), "']' expected"); + return true; + } E = RBracTok.getLoc(); Parser.Lex(); // Eat right bracket token. const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { - E = ExclaimTok.getLoc(); + WBOp = ARMOperand::CreateToken(ExclaimTok.getString(), + ExclaimTok.getLoc()); Writeback = true; Parser.Lex(); // Eat exclaim token } - ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, - OffsetRegShifted, ShiftType, ShiftAmount, - Preindexed, Postindexed, Negative, Writeback, S, E); - return false; - } - // The "[Rn" we have so far was not followed by a comma. - else if (Tok.is(AsmToken::RBrac)) { - // This is a post indexing addressing forms, that is a ']' follows after - // the "[Rn". - Postindexed = true; - Writeback = true; + } else { + // The "[Rn" we have so far was not followed by a comma. + + // If there's anything other than the right brace, this is a post indexing + // addressing form. E = Tok.getLoc(); Parser.Lex(); // Eat right bracket token. - int OffsetRegNum = 0; - bool OffsetRegShifted = false; - enum ShiftType ShiftType; - const MCExpr *ShiftAmount; - const MCExpr *Offset; - const AsmToken &NextTok = Parser.getTok(); + if (NextTok.isNot(AsmToken::EndOfStatement)) { - if (NextTok.isNot(AsmToken::Comma)) - return Error(NextTok.getLoc(), "',' expected"); + Postindexed = true; + Writeback = true; + + if (NextTok.isNot(AsmToken::Comma)) { + Error(NextTok.getLoc(), "',' expected"); + return true; + } + Parser.Lex(); // Eat comma token. - if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, - ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, - E)) + + if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, + ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, + E)) return true; } + } - ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, - OffsetRegShifted, ShiftType, ShiftAmount, - Preindexed, Postindexed, Negative, Writeback, S, E); - return false; + // Force Offset to exist if used. + if (!OffsetIsReg) { + if (!Offset) + Offset = MCConstantExpr::Create(0, getContext()); } - return true; + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, + OffsetRegNum, OffsetRegShifted, + ShiftType, ShiftAmount, Preindexed, + Postindexed, Negative, Writeback, + S, E)); + if (WBOp) + Operands.push_back(WBOp); + + return false; } /// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," @@ -543,7 +1171,6 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, bool &OffsetIsReg, int &OffsetRegNum, SMLoc &E) { - OwningPtr<ARMOperand> Op; Negative = false; OffsetRegShifted = false; OffsetIsReg = false; @@ -559,13 +1186,15 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, // See if there is a register following the "[Rn," or "[Rn]," we have so far. const AsmToken &OffsetRegTok = Parser.getTok(); if (OffsetRegTok.is(AsmToken::Identifier)) { - OffsetIsReg = !MaybeParseRegister(Op, false); - if (OffsetIsReg) { - E = Op->getEndLoc(); - OffsetRegNum = Op->getReg(); + SMLoc CurLoc = OffsetRegTok.getLoc(); + OffsetRegNum = TryParseRegister(); + if (OffsetRegNum != -1) { + OffsetIsReg = true; + E = CurLoc; } } - // If we parsed a register as the offset then their can be a shift after that + + // If we parsed a register as the offset then there can be a shift after that. if (OffsetRegNum != -1) { // Look for a comma then a shift const AsmToken &Tok = Parser.getTok(); @@ -583,7 +1212,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, const AsmToken &HashTok = Parser.getTok(); if (HashTok.isNot(AsmToken::Hash)) return Error(HashTok.getLoc(), "'#' expected"); - + Parser.Lex(); // Eat hash token. if (getParser().ParseExpression(Offset)) @@ -597,8 +1226,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, /// ( lsl | lsr | asr | ror ) , # shift_amount /// rrx /// and returns true if it parses a shift otherwise it returns false. -bool ARMAsmParser::ParseShift(ShiftType &St, - const MCExpr *&ShiftAmount, +bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -636,13 +1264,33 @@ bool ARMAsmParser::ParseShift(ShiftType &St, /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. -bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { +bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + StringRef Mnemonic) { SMLoc S, E; - + + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) + return false; + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + switch (getLexer().getKind()) { + default: + Error(Parser.getTok().getLoc(), "unexpected token in operand"); + return true; case AsmToken::Identifier: - if (!MaybeParseRegister(Op, true)) + if (!TryParseRegisterWithWriteBack(Operands)) return false; + + // Fall though for the Identifier case that is not a register or a + // special name. + case AsmToken::Integer: // things like 1f and 2b as a branch targets + case AsmToken::Dot: { // . as a branch target // This was not a register so parse other operands that start with an // identifier (like labels) as expressions and create them as immediates. const MCExpr *IdVal; @@ -650,12 +1298,13 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { if (getParser().ParseExpression(IdVal)) return true; E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - ARMOperand::CreateImm(Op, IdVal, S, E); + Operands.push_back(ARMOperand::CreateImm(IdVal, S, E)); return false; + } case AsmToken::LBrac: - return ParseMemory(Op); + return ParseMemory(Operands); case AsmToken::LCurly: - return ParseRegisterList(Op); + return ParseRegisterList(Operands); case AsmToken::Hash: // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate @@ -665,28 +1314,134 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) { if (getParser().ParseExpression(ImmVal)) return true; E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - ARMOperand::CreateImm(Op, ImmVal, S, E); + Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); return false; - default: - return Error(Parser.getTok().getLoc(), "unexpected token in operand"); + case AsmToken::Colon: { + // ":lower16:" and ":upper16:" expression prefixes + // FIXME: Check it's an expression prefix, + // e.g. (FOO - :lower16:BAR) isn't legal. + ARMMCExpr::VariantKind RefKind; + if (ParsePrefix(RefKind)) + return true; + + const MCExpr *SubExprVal; + if (getParser().ParseExpression(SubExprVal)) + return true; + + const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal, + getContext()); + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E)); + return false; + } } } -/// Parse an arm instruction mnemonic followed by its operands. -bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - OwningPtr<ARMOperand> Op; +// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. +// :lower16: and :upper16:. +bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) { + RefKind = ARMMCExpr::VK_ARM_None; - // Create the leading tokens for the mnemonic, split by '.' characters. - size_t Start = 0, Next = Name.find('.'); - StringRef Head = Name.slice(Start, Next); + // :lower16: and :upper16: modifiers + assert(getLexer().is(AsmToken::Colon) && "expected a :"); + Parser.Lex(); // Eat ':' + + if (getLexer().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), "expected prefix identifier in operand"); + return true; + } + + StringRef IDVal = Parser.getTok().getIdentifier(); + if (IDVal == "lower16") { + RefKind = ARMMCExpr::VK_ARM_LO16; + } else if (IDVal == "upper16") { + RefKind = ARMMCExpr::VK_ARM_HI16; + } else { + Error(Parser.getTok().getLoc(), "unexpected prefix in operand"); + return true; + } + Parser.Lex(); + + if (getLexer().isNot(AsmToken::Colon)) { + Error(Parser.getTok().getLoc(), "unexpected token after prefix"); + return true; + } + Parser.Lex(); // Eat the last ':' + return false; +} + +const MCExpr * +ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E, + MCSymbolRefExpr::VariantKind Variant) { + // Recurse over the given expression, rebuilding it to apply the given variant + // to the leftmost symbol. + if (Variant == MCSymbolRefExpr::VK_None) + return E; + + switch (E->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle target expr yet"); + case MCExpr::Constant: + llvm_unreachable("Can't handle lower16/upper16 of constant yet"); + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E); - // Determine the predicate, if any. + if (SRE->getKind() != MCSymbolRefExpr::VK_None) + return 0; + + return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext()); + } + + case MCExpr::Unary: + llvm_unreachable("Can't handle unary expressions yet"); + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast<MCBinaryExpr>(E); + const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant); + const MCExpr *RHS = BE->getRHS(); + if (!LHS) + return 0; + + return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext()); + } + } + + assert(0 && "Invalid expression kind!"); + return 0; +} + +/// \brief Given a mnemonic, split out possible predication code and carry +/// setting letters to form a canonical mnemonic and flags. +// +// FIXME: Would be nice to autogen this. +static StringRef SplitMnemonic(StringRef Mnemonic, + unsigned &PredicationCode, + bool &CarrySetting, + unsigned &ProcessorIMod) { + PredicationCode = ARMCC::AL; + CarrySetting = false; + ProcessorIMod = 0; + + // Ignore some mnemonics we know aren't predicated forms. // - // FIXME: We need a way to check whether a prefix supports predication, - // otherwise we will end up with an ambiguity for instructions that happen to - // end with a predicate name. - unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2)) + // FIXME: Would be nice to autogen this. + if (Mnemonic == "teq" || Mnemonic == "vceq" || + Mnemonic == "movs" || + Mnemonic == "svc" || + (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" || + Mnemonic == "vmls" || Mnemonic == "vnmls") || + Mnemonic == "vacge" || Mnemonic == "vcge" || + Mnemonic == "vclt" || + Mnemonic == "vacgt" || Mnemonic == "vcgt" || + Mnemonic == "vcle" || + (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || + Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" || + Mnemonic == "vqdmlal")) + return Mnemonic; + + // First, split out any predication code. + unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2)) .Case("eq", ARMCC::EQ) .Case("ne", ARMCC::NE) .Case("hs", ARMCC::HS) @@ -704,44 +1459,268 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, .Case("al", ARMCC::AL) .Default(~0U); if (CC != ~0U) { - Head = Head.slice(0, Head.size() - 2); - } else - CC = ARMCC::AL; + Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2); + PredicationCode = CC; + } + + // Next, determine if we have a carry setting bit. We explicitly ignore all + // the instructions we know end in 's'. + if (Mnemonic.endswith("s") && + !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" || + Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" || + Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" || + Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" || + Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) { + Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); + CarrySetting = true; + } + + // The "cps" instruction can have a interrupt mode operand which is glued into + // the mnemonic. Check if this is the case, split it and parse the imod op + if (Mnemonic.startswith("cps")) { + // Split out any imod code. + unsigned IMod = + StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2)) + .Case("ie", ARM_PROC::IE) + .Case("id", ARM_PROC::ID) + .Default(~0U); + if (IMod != ~0U) { + Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2); + ProcessorIMod = IMod; + } + } + + return Mnemonic; +} + +/// \brief Given a canonical mnemonic, determine if the instruction ever allows +/// inclusion of carry set or predication code operands. +// +// FIXME: It would be nice to autogen this. +void ARMAsmParser:: +GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, + bool &CanAcceptPredicationCode) { + bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb(); + + if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || + Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || + Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" || + Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" || + Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mov" || + Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || + Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" || + Mnemonic == "eor" || Mnemonic == "smlal" || Mnemonic == "mvn") { + CanAcceptCarrySet = true; + } else { + CanAcceptCarrySet = false; + } + + if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" || + Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" || + Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" || + Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" || + Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" || + Mnemonic == "clrex" || Mnemonic.startswith("cps")) { + CanAcceptPredicationCode = false; + } else { + CanAcceptPredicationCode = true; + } - ARMOperand::CreateToken(Op, Head, NameLoc); - Operands.push_back(Op.take()); + if (isThumb) + if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" || + Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp") + CanAcceptPredicationCode = false; +} + +/// Parse an arm instruction mnemonic followed by its operands. +bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Head = Name.slice(Start, Next); + + // Split out the predication code and carry setting flag from the mnemonic. + unsigned PredicationCode; + unsigned ProcessorIMod; + bool CarrySetting; + Head = SplitMnemonic(Head, PredicationCode, CarrySetting, + ProcessorIMod); + + Operands.push_back(ARMOperand::CreateToken(Head, NameLoc)); + + // Next, add the CCOut and ConditionCode operands, if needed. + // + // For mnemonics which can ever incorporate a carry setting bit or predication + // code, our matching model involves us always generating CCOut and + // ConditionCode operands to match the mnemonic "as written" and then we let + // the matcher deal with finding the right instruction or generating an + // appropriate error. + bool CanAcceptCarrySet, CanAcceptPredicationCode; + GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode); + + // Add the carry setting operand, if necessary. + // + // FIXME: It would be awesome if we could somehow invent a location such that + // match errors on this operand would print a nice diagnostic about how the + // 's' character in the mnemonic resulted in a CCOut operand. + if (CanAcceptCarrySet) { + Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0, + NameLoc)); + } else { + // This mnemonic can't ever accept a carry set, but the user wrote one (or + // misspelled another mnemonic). + + // FIXME: Issue a nice error. + } + + // Add the predication code operand, if necessary. + if (CanAcceptPredicationCode) { + Operands.push_back(ARMOperand::CreateCondCode( + ARMCC::CondCodes(PredicationCode), NameLoc)); + } else { + // This mnemonic can't ever accept a predication code, but the user wrote + // one (or misspelled another mnemonic). + + // FIXME: Issue a nice error. + } + + // Add the processor imod operand, if necessary. + if (ProcessorIMod) { + Operands.push_back(ARMOperand::CreateImm( + MCConstantExpr::Create(ProcessorIMod, getContext()), + NameLoc, NameLoc)); + } else { + // This mnemonic can't ever accept a imod, but the user wrote + // one (or misspelled another mnemonic). - ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc); - Operands.push_back(Op.take()); + // FIXME: Issue a nice error. + } // Add the remaining tokens in the mnemonic. while (Next != StringRef::npos) { Start = Next; Next = Name.find('.', Start + 1); - Head = Name.slice(Start, Next); + StringRef ExtraToken = Name.slice(Start, Next); - ARMOperand::CreateToken(Op, Head, NameLoc); - Operands.push_back(Op.take()); + Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc)); } // Read the remaining operands. if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - OwningPtr<ARMOperand> Op; - if (ParseOperand(Op)) return true; - Operands.push_back(Op.take()); + if (ParseOperand(Operands, Head)) { + Parser.EatToEndOfStatement(); + return true; + } while (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. // Parse and remember the operand. - if (ParseOperand(Op)) return true; - Operands.push_back(Op.take()); + if (ParseOperand(Operands, Head)) { + Parser.EatToEndOfStatement(); + return true; + } } } + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Parser.EatToEndOfStatement(); + return TokError("unexpected token in argument list"); + } + + Parser.Lex(); // Consume the EndOfStatement return false; } +bool ARMAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out) { + MCInst Inst; + unsigned ErrorInfo; + MatchResultTy MatchResult, MatchResult2; + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo); + if (MatchResult != Match_Success) { + // If we get a Match_InvalidOperand it might be some arithmetic instruction + // that does not update the condition codes. So try adding a CCOut operand + // with a value of reg0. + if (MatchResult == Match_InvalidOperand) { + Operands.insert(Operands.begin() + 1, + ARMOperand::CreateCCOut(0, + ((ARMOperand*)Operands[0])->getStartLoc())); + MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo); + if (MatchResult2 == Match_Success) + MatchResult = Match_Success; + else { + ARMOperand *CCOut = ((ARMOperand*)Operands[1]); + Operands.erase(Operands.begin() + 1); + delete CCOut; + } + } + // If we get a Match_MnemonicFail it might be some arithmetic instruction + // that updates the condition codes if it ends in 's'. So see if the + // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut + // operand with a value of CPSR. + else if(MatchResult == Match_MnemonicFail) { + // Get the instruction mnemonic, which is the first token. + StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken(); + if (Mnemonic.substr(Mnemonic.size()-1) == "s") { + // removed the 's' from the mnemonic for matching. + StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1); + SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc(); + ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]); + Operands.erase(Operands.begin()); + delete OldMnemonic; + Operands.insert(Operands.begin(), + ARMOperand::CreateToken(MnemonicNoS, NameLoc)); + Operands.insert(Operands.begin() + 1, + ARMOperand::CreateCCOut(ARM::CPSR, NameLoc)); + MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo); + if (MatchResult2 == Match_Success) + MatchResult = Match_Success; + else { + ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]); + Operands.erase(Operands.begin()); + delete OldMnemonic; + Operands.insert(Operands.begin(), + ARMOperand::CreateToken(Mnemonic, NameLoc)); + ARMOperand *CCOut = ((ARMOperand*)Operands[1]); + Operands.erase(Operands.begin() + 1); + delete CCOut; + } + } + } + } + switch (MatchResult) { + case Match_Success: + Out.EmitInstruction(Inst); + return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction mnemonic"); + case Match_ConversionFail: + return Error(IDLoc, "unable to convert operands to instruction"); + } + + llvm_unreachable("Implement any new match types added!"); + return true; +} + /// ParseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); @@ -771,7 +1750,7 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); @@ -801,16 +1780,16 @@ bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) - return Error(L, "unexpected token in .syntax directive"); - StringRef ATTRIBUTE_UNUSED SymbolName = Parser.getTok().getIdentifier(); + return Error(L, "unexpected token in .thumb_func directive"); + StringRef Name = Tok.getString(); Parser.Lex(); // Consume the identifier token. - if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(L, "unexpected token in directive"); Parser.Lex(); - // TODO: mark symbol as a thumb symbol - // getParser().getStreamer().Emit???(); + // Mark symbol as a thumb symbol. + MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name); + getParser().getStreamer().EmitThumbFunc(Func); return false; } @@ -824,7 +1803,7 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { if (Mode == "unified" || Mode == "UNIFIED") Parser.Lex(); else if (Mode == "divided" || Mode == "DIVIDED") - Parser.Lex(); + return Error(L, "'.syntax divided' arm asssembly not supported"); else return Error(L, "unrecognized syntax mode in .syntax directive"); @@ -855,8 +1834,21 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { return Error(Parser.getTok().getLoc(), "unexpected token in directive"); Parser.Lex(); - // TODO tell the MC streamer the mode - // getParser().getStreamer().Emit???(); + // FIXME: We need to be able switch subtargets at this point so that + // MatchInstructionImpl() will work when it gets the AvailableFeatures which + // includes Feature_IsThumb or not to match the right instructions. This is + // blocked on the FIXME in llvm-mc.cpp when creating the TargetMachine. + if (Val == 16){ + assert(TM.getSubtarget<ARMSubtarget>().isThumb() && + "switching between arm/thumb not yet suppported via .code 16)"); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + } + else{ + assert(!TM.getSubtarget<ARMSubtarget>().isThumb() && + "switching between thumb/arm not yet suppported via .code 32)"); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); + } + return false; } @@ -869,4 +1861,6 @@ extern "C" void LLVMInitializeARMAsmParser() { LLVMInitializeARMAsmLexer(); } +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp deleted file mode 100644 index 8026e77..0000000 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ /dev/null @@ -1,800 +0,0 @@ -//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an ARM MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "ARM.h" // FIXME: FACTOR ENUMS BETTER. -#include "ARMInstPrinter.h" -#include "ARMAddressingModes.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -// Include the auto-generated portion of the assembly writer. -#define MachineInstr MCInst -#define ARMAsmPrinter ARMInstPrinter // FIXME: REMOVE. -#include "ARMGenAsmWriter.inc" -#undef MachineInstr -#undef ARMAsmPrinter - -static unsigned NextReg(unsigned Reg) { - switch (Reg) { - default: - assert(0 && "Unexpected register enum"); - - case ARM::D0: - return ARM::D1; - case ARM::D1: - return ARM::D2; - case ARM::D2: - return ARM::D3; - case ARM::D3: - return ARM::D4; - case ARM::D4: - return ARM::D5; - case ARM::D5: - return ARM::D6; - case ARM::D6: - return ARM::D7; - case ARM::D7: - return ARM::D8; - case ARM::D8: - return ARM::D9; - case ARM::D9: - return ARM::D10; - case ARM::D10: - return ARM::D11; - case ARM::D11: - return ARM::D12; - case ARM::D12: - return ARM::D13; - case ARM::D13: - return ARM::D14; - case ARM::D14: - return ARM::D15; - case ARM::D15: - return ARM::D16; - case ARM::D16: - return ARM::D17; - case ARM::D17: - return ARM::D18; - case ARM::D18: - return ARM::D19; - case ARM::D19: - return ARM::D20; - case ARM::D20: - return ARM::D21; - case ARM::D21: - return ARM::D22; - case ARM::D22: - return ARM::D23; - case ARM::D23: - return ARM::D24; - case ARM::D24: - return ARM::D25; - case ARM::D25: - return ARM::D26; - case ARM::D26: - return ARM::D27; - case ARM::D27: - return ARM::D28; - case ARM::D28: - return ARM::D29; - case ARM::D29: - return ARM::D30; - case ARM::D30: - return ARM::D31; - } -} - -void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { - // Check for MOVs and print canonical forms, instead. - if (MI->getOpcode() == ARM::MOVs) { - const MCOperand &Dst = MI->getOperand(0); - const MCOperand &MO1 = MI->getOperand(1); - const MCOperand &MO2 = MI->getOperand(2); - const MCOperand &MO3 = MI->getOperand(3); - - O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())); - printSBitModifierOperand(MI, 6, O); - printPredicateOperand(MI, 4, O); - - O << '\t' << getRegisterName(Dst.getReg()) - << ", " << getRegisterName(MO1.getReg()); - - if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx) - return; - - O << ", "; - - if (MO2.getReg()) { - O << getRegisterName(MO2.getReg()); - assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else { - O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); - } - return; - } - - // A8.6.123 PUSH - if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { - const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) { - O << '\t' << "push"; - printPredicateOperand(MI, 3, O); - O << '\t'; - printRegisterList(MI, 5, O); - return; - } - } - - // A8.6.122 POP - if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { - const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) { - O << '\t' << "pop"; - printPredicateOperand(MI, 3, O); - O << '\t'; - printRegisterList(MI, 5, O); - return; - } - } - - // A8.6.355 VPUSH - if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { - const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) { - O << '\t' << "vpush"; - printPredicateOperand(MI, 3, O); - O << '\t'; - printRegisterList(MI, 5, O); - return; - } - } - - // A8.6.354 VPOP - if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { - const MCOperand &MO1 = MI->getOperand(2); - if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) { - O << '\t' << "vpop"; - printPredicateOperand(MI, 3, O); - O << '\t'; - printRegisterList(MI, 5, O); - return; - } - } - - printInstruction(MI, O); - } - -void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - if (Modifier && strcmp(Modifier, "dregpair") == 0) { - O << '{' << getRegisterName(Reg) << ", " - << getRegisterName(NextReg(Reg)) << '}'; -#if 0 - // FIXME: Breaks e.g. ARM/vmul.ll. - assert(0); - /* - unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0); - unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1); - O << '{' - << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) - << '}';*/ -#endif - } else if (Modifier && strcmp(Modifier, "lane") == 0) { - assert(0); - /* - unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); - unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, - &ARM::DPR_VFP2RegClass); - O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; - */ - } else { - O << getRegisterName(Reg); - } - } else if (Op.isImm()) { - assert((Modifier && !strcmp(Modifier, "call")) || - ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported")); - O << '#' << Op.getImm(); - } else { - if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0) - llvm_unreachable("Unsupported modifier"); - assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); - } -} - -static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, - const MCAsmInfo *MAI) { - // Break it up into two parts that make up a shifter immediate. - V = ARM_AM::getSOImmVal(V); - assert(V != -1 && "Not a valid so_imm value!"); - - unsigned Imm = ARM_AM::getSOImmValImm(V); - unsigned Rot = ARM_AM::getSOImmValRot(V); - - // Print low-level immediate formation info, per - // A5.1.3: "Data-processing operands - Immediate". - if (Rot) { - O << "#" << Imm << ", " << Rot; - // Pretty printed version. - if (VerboseAsm) - O << ' ' << MAI->getCommentString() - << ' ' << (int)ARM_AM::rotr32(Imm, Rot); - } else { - O << "#" << Imm; - } -} - - -/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit -/// immediate in bits 0-7. -void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - assert(MO.isImm() && "Not a valid so_imm value!"); - printSOImm(O, MO.getImm(), VerboseAsm, &MAI); -} - -/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov' -/// followed by an 'orr' to materialize. -void ARMInstPrinter::printSOImm2PartOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - // FIXME: REMOVE this method. - abort(); -} - -// so_reg is a 4-operand unit corresponding to register forms of the A5.1 -// "Addressing Mode 1 - Data-processing operands" forms. This includes: -// REG 0 0 - e.g. R5 -// REG REG 0,SH_OPC - e.g. R5, ROR R3 -// REG 0 IMM,SH_OPC - e.g. R5, LSL #3 -void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); - - O << getRegisterName(MO1.getReg()); - - // Print the shift opc. - ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); - O << ", " << ARM_AM::getShiftOpcStr(ShOpc); - if (MO2.getReg()) { - O << ' ' << getRegisterName(MO2.getReg()); - assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); - } else if (ShOpc != ARM_AM::rrx) { - O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); - } -} - - -void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); - - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); - return; - } - - O << "[" << getRegisterName(MO1.getReg()); - - if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. - O << ", #" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << ARM_AM::getAM2Offset(MO3.getImm()); - O << "]"; - return; - } - - O << ", " - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) - << getRegisterName(MO2.getReg()); - - if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) - << " #" << ShImm; - O << "]"; -} - -void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - - if (!MO1.getReg()) { - unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << ImmOffs; - return; - } - - O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) - << getRegisterName(MO1.getReg()); - - if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) - O << ", " - << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) - << " #" << ShImm; -} - -void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); - - O << '[' << getRegisterName(MO1.getReg()); - - if (MO2.getReg()) { - O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm()) - << getRegisterName(MO2.getReg()) << ']'; - return; - } - - if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) - O << ", #" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << ImmOffs; - O << ']'; -} - -void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - - if (MO1.getReg()) { - O << (char)ARM_AM::getAM3Op(MO2.getImm()) - << getRegisterName(MO1.getReg()); - return; - } - - unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - O << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) - << ImmOffs; -} - - -void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, - const char *Modifier) { - const MCOperand &MO2 = MI->getOperand(OpNum+1); - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); - if (Modifier && strcmp(Modifier, "submode") == 0) { - O << ARM_AM::getAMSubModeStr(Mode); - } else if (Modifier && strcmp(Modifier, "wide") == 0) { - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); - if (Mode == ARM_AM::ia) - O << ".w"; - } else { - printOperand(MI, OpNum, O); - } -} - -void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, - const char *Modifier) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, OpNum, O); - return; - } - - O << "[" << getRegisterName(MO1.getReg()); - - if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { - O << ", #" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) - << ImmOffs*4; - } - O << "]"; -} - -void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - - O << "[" << getRegisterName(MO1.getReg()); - if (MO2.getImm()) { - // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << (MO2.getImm() << 3); - } - O << "]"; -} - -void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - if (MO.getReg() == 0) - O << "!"; - else - O << ", " << getRegisterName(MO.getReg()); -} - -void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, - const char *Modifier) { - assert(0 && "FIXME: Implement printAddrModePCOperand"); -} - -void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - uint32_t v = ~MO.getImm(); - int32_t lsb = CountTrailingZeros_32(v); - int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; - assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); - O << '#' << lsb << ", #" << width; -} - -void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned val = MI->getOperand(OpNum).getImm(); - O << ARM_MB::MemBOptToString(val); -} - -void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned ShiftOp = MI->getOperand(OpNum).getImm(); - ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); - switch (Opc) { - case ARM_AM::no_shift: - return; - case ARM_AM::lsl: - O << ", lsl #"; - break; - case ARM_AM::asr: - O << ", asr #"; - break; - default: - assert(0 && "unexpected shift opcode for shift immediate operand"); - } - O << ARM_AM::getSORegOffset(ShiftOp); -} - -void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "{"; - for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { - if (i != OpNum) O << ", "; - O << getRegisterName(MI->getOperand(i).getReg()); - } - O << "}"; -} - -void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - unsigned option = Op.getImm(); - unsigned mode = option & 31; - bool changemode = option >> 5 & 1; - unsigned AIF = option >> 6 & 7; - unsigned imod = option >> 9 & 3; - if (imod == 2) - O << "ie"; - else if (imod == 3) - O << "id"; - O << '\t'; - if (imod > 1) { - if (AIF & 4) O << 'a'; - if (AIF & 2) O << 'i'; - if (AIF & 1) O << 'f'; - if (AIF > 0 && changemode) O << ", "; - } - if (changemode) - O << '#' << mode; -} - -void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - unsigned Mask = Op.getImm(); - if (Mask) { - O << '_'; - if (Mask & 8) O << 'f'; - if (Mask & 4) O << 's'; - if (Mask & 2) O << 'x'; - if (Mask & 1) O << 'c'; - } -} - -void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - O << '#'; - if (Op.getImm() < 0) - O << '-' << (-Op.getImm() - 1); - else - O << Op.getImm(); -} - -void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); - if (CC != ARMCC::AL) - O << ARMCondCodeToString(CC); -} - -void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); - O << ARMCondCodeToString(CC); -} - -void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - if (MI->getOperand(OpNum).getReg()) { - assert(MI->getOperand(OpNum).getReg() == ARM::CPSR && - "Expect ARM CPSR register!"); - O << 's'; - } -} - - - -void ARMInstPrinter::printCPInstOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, - const char *Modifier) { - // FIXME: remove this. - abort(); -} - -void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << MI->getOperand(OpNum).getImm(); -} - - -void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - // FIXME: remove this. - abort(); -} - -void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#" << MI->getOperand(OpNum).getImm() * 4; -} - -void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - // (3 - the number of trailing zeros) is the number of then / else. - unsigned Mask = MI->getOperand(OpNum).getImm(); - unsigned CondBit0 = Mask >> 4 & 1; - unsigned NumTZ = CountTrailingZeros_32(Mask); - assert(NumTZ <= 3 && "Invalid IT mask!"); - for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { - bool T = ((Mask >> Pos) & 1) == CondBit0; - if (T) - O << 't'; - else - O << 'e'; - } -} - -void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - O << "[" << getRegisterName(MO1.getReg()); - O << ", " << getRegisterName(MO2.getReg()) << "]"; -} - -void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op, - raw_ostream &O, - unsigned Scale) { - const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); - - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); - return; - } - - O << "[" << getRegisterName(MO1.getReg()); - if (MO3.getReg()) - O << ", " << getRegisterName(MO3.getReg()); - else if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs * Scale; - O << "]"; -} - -void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op, - raw_ostream &O) { - printThumbAddrModeRI5Operand(MI, Op, O, 1); -} - -void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op, - raw_ostream &O) { - printThumbAddrModeRI5Operand(MI, Op, O, 2); -} - -void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op, - raw_ostream &O) { - printThumbAddrModeRI5Operand(MI, Op, O, 4); -} - -void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - O << "[" << getRegisterName(MO1.getReg()); - if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs*4; - O << "]"; -} - -void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg()); - if (MI->getOpcode() == ARM::t2TBH) - O << ", lsl #1"; - O << ']'; -} - -// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 -// register with shift forms. -// REG 0 0 - e.g. R5 -// REG IMM, SH_OPC - e.g. R5, LSL #3 -void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - - unsigned Reg = MO1.getReg(); - O << getRegisterName(Reg); - - // Print the shift opc. - assert(MO2.isImm() && "Not a valid t2_so_reg value!"); - ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); - O << ", " << ARM_AM::getShiftOpcStr(ShOpc); - if (ShOpc != ARM_AM::rrx) - O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); -} - -void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - - O << "[" << getRegisterName(MO1.getReg()); - - unsigned OffImm = MO2.getImm(); - if (OffImm) // Don't print +0. - O << ", #" << OffImm; - O << "]"; -} - -void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - - O << "[" << getRegisterName(MO1.getReg()); - - int32_t OffImm = (int32_t)MO2.getImm(); - // Don't print +0. - if (OffImm < 0) - O << ", #-" << -OffImm; - else if (OffImm > 0) - O << ", #" << OffImm; - O << "]"; -} - -void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - - O << "[" << getRegisterName(MO1.getReg()); - - int32_t OffImm = (int32_t)MO2.getImm() / 4; - // Don't print +0. - if (OffImm < 0) - O << ", #-" << -OffImm * 4; - else if (OffImm > 0) - O << ", #" << OffImm * 4; - O << "]"; -} - -void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - int32_t OffImm = (int32_t)MO1.getImm(); - // Don't print +0. - if (OffImm < 0) - O << "#-" << -OffImm; - else if (OffImm > 0) - O << "#" << OffImm; -} - -void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - int32_t OffImm = (int32_t)MO1.getImm() / 4; - // Don't print +0. - if (OffImm < 0) - O << "#-" << -OffImm * 4; - else if (OffImm > 0) - O << "#" << OffImm * 4; -} - -void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); - - O << "[" << getRegisterName(MO1.getReg()); - - assert(MO2.getReg() && "Invalid so_reg load / store address!"); - O << ", " << getRegisterName(MO2.getReg()); - - unsigned ShAmt = MO3.getImm(); - if (ShAmt) { - assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); - O << ", lsl #" << ShAmt; - } - O << "]"; -} - -void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << MI->getOperand(OpNum).getImm(); -} - -void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << MI->getOperand(OpNum).getImm(); -} - -void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned EncodedImm = MI->getOperand(OpNum).getImm(); - unsigned EltBits; - uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - O << "#0x" << utohexstr(Val); -} diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h deleted file mode 100644 index e5ad0d0..0000000 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ /dev/null @@ -1,118 +0,0 @@ -//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an ARM MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMINSTPRINTER_H -#define ARMINSTPRINTER_H - -#include "llvm/MC/MCInstPrinter.h" - -namespace llvm { - class MCOperand; - -class ARMInstPrinter : public MCInstPrinter { - bool VerboseAsm; -public: - ARMInstPrinter(const MCAsmInfo &MAI, bool verboseAsm) - : MCInstPrinter(MAI), VerboseAsm(verboseAsm) {} - - virtual void printInst(const MCInst *MI, raw_ostream &O); - - // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0); - - void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSOImm2PartOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printAddrMode4Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O, - const char *Modifier = 0); - void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O, - const char *Modifier = 0); - void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O, - const char *Modifier = 0); - - void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, unsigned Scale); - void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printSBitModifierOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCPInstOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O, - const char *Modifier); - void printJTBlockOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {} - void printJT2BlockOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {} - void printTBAddrMode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - // FIXME: Implement. - void PrintSpecial(const MCInst *MI, raw_ostream &O, const char *Kind) {} -}; - -} - -#endif diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 18645c0..0000000 --- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARMAsmPrinter - ARMInstPrinter.cpp - ) -add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile deleted file mode 100644 index 65d372e..0000000 --- a/lib/Target/ARM/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARMAsmPrinter - -# Hack: we need to include 'main' arm target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 6b4dee5..d3b8b54 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -6,6 +6,7 @@ tablegen(ARMGenRegisterInfo.inc -gen-register-desc) tablegen(ARMGenInstrNames.inc -gen-instr-enums) tablegen(ARMGenInstrInfo.inc -gen-instr-desc) tablegen(ARMGenCodeEmitter.inc -gen-emitter) +tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(ARMGenAsmWriter.inc -gen-asm-writer) tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher) tablegen(ARMGenDAGISel.inc -gen-dag-isel) @@ -13,21 +14,28 @@ tablegen(ARMGenFastISel.inc -gen-fast-isel) tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) +tablegen(ARMGenDecoderTables.inc -gen-arm-decoder) add_llvm_target(ARMCodeGen + ARMAsmBackend.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp + ARMELFWriterInfo.cpp ARMExpandPseudoInsts.cpp ARMFastISel.cpp + ARMFrameLowering.cpp ARMGlobalMerge.cpp + ARMHazardRecognizer.cpp ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp ARMJITInfo.cpp + ARMMCCodeEmitter.cpp + ARMMCExpr.cpp ARMLoadStoreOptimizer.cpp ARMMCAsmInfo.cpp ARMMCInstLower.cpp @@ -36,15 +44,26 @@ add_llvm_target(ARMCodeGen ARMSubtarget.cpp ARMTargetMachine.cpp ARMTargetObjectFile.cpp + MLxExpansionPass.cpp NEONMoveFix.cpp - NEONPreAllocPass.cpp Thumb1InstrInfo.cpp + Thumb1FrameLowering.cpp Thumb1RegisterInfo.cpp - Thumb2HazardRecognizer.cpp Thumb2ITBlockPass.cpp Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp ) -target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG) +# workaround for hanging compilation on MSVC10 +if( MSVC_VERSION EQUAL 1600 ) +set_property( + SOURCE ARMISelLowering.cpp + PROPERTY COMPILE_FLAGS "/Od" + ) +endif() + +add_subdirectory(TargetInfo) +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e220289..78d73d3 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -39,9 +39,9 @@ /// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding /// function for a Thumb instruction. /// -#include "../ARMGenDecoderTables.inc" +#include "ARMGenDecoderTables.inc" -#include "../ARMGenEDInfo.inc" +#include "ARMGenEDInfo.inc" using namespace llvm; @@ -89,7 +89,8 @@ static unsigned decodeARMInstruction(uint32_t &insn) { return ARM::BFI; } - // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2. + // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings + // A1 & A2. // As a result, the decoder fails to deocode USAT properly. if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) return ARM::USAT; @@ -252,9 +253,6 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { default: return Opcode; // Return unmorphed opcode. - case ARM::t2LDRDi8: - return ARM::t2LDRDpci; - case ARM::t2LDR_POST: case ARM::t2LDR_PRE: case ARM::t2LDRi12: case ARM::t2LDRi8: case ARM::t2LDRs: case ARM::t2LDRT: @@ -349,36 +347,6 @@ static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { return decodeThumbInstruction(insn); } -static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::t2PLDi12: case ARM::t2PLDi8: - case ARM::t2PLDr: case ARM::t2PLDs: - case ARM::t2PLDWi12: case ARM::t2PLDWi8: - case ARM::t2PLDWr: case ARM::t2PLDWs: - case ARM::t2PLIi12: case ARM::t2PLIi8: - case ARM::t2PLIr: case ARM::t2PLIs: - return true; - } -} - -static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) { - switch (Opcode) { - default: - return 0; - case ARM::t2PLDi12: case ARM::t2PLDi8: - case ARM::t2PLDr: case ARM::t2PLDs: - return ARM::t2PLDpci; - case ARM::t2PLDWi12: case ARM::t2PLDWi8: - case ARM::t2PLDWr: case ARM::t2PLDWs: - return ARM::t2PLDWpci; - case ARM::t2PLIi12: case ARM::t2PLIi8: - case ARM::t2PLIr: case ARM::t2PLIs: - return ARM::t2PLIpci; - } -} - // // Public interface for the disassembler // @@ -485,11 +453,6 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, // instructions as well. unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn); - // A8.6.117/119/120/121. - // PLD/PLDW/PLI instructions with Rn==15 is transformed to the pci variant. - if (Thumb2PreloadOpcodeNoPCI(Opcode) && slice(insn, 19, 16) == 15) - Opcode = T2Morph2Preload2PCI(Opcode); - ARMFormat Format = ARMFormats[Opcode]; Size = IsThumb2 ? 4 : 2; @@ -568,9 +531,9 @@ static MCDisassembler *createThumbDisassembler(const Target &T) { return new ThumbDisassembler; } -extern "C" void LLVMInitializeARMDisassembler() { +extern "C" void LLVMInitializeARMDisassembler() { // Register the disassembler. - TargetRegistry::RegisterMCDisassembler(TheARMTarget, + TargetRegistry::RegisterMCDisassembler(TheARMTarget, createARMDisassembler); TargetRegistry::RegisterMCDisassembler(TheThumbTarget, createThumbDisassembler); diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index 9f493b9..bac68dd 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -79,22 +79,9 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) { } // Return the register enum Based on RegClass and the raw register number. -// For DRegPair, see comments below. // FIXME: Auto-gened? -static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister, - bool DRegPair = false) { - - if (DRegPair && RegClassID == ARM::QPRRegClassID) { - // LLVM expects { Dd, Dd+1 } to form a super register; this is not specified - // in the ARM Architecture Manual as far as I understand it (A8.6.307). - // Therefore, we morph the RegClassID to be the sub register class and don't - // subsequently transform the RawRegister encoding when calculating RegNum. - // - // See also ARMinstPrinter::printOperand() wrt "dregpair" modifier part - // where this workaround is meant for. - RegClassID = ARM::DPRRegClassID; - } - +static unsigned +getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) { // For this purpose, we can treat rGPR as if it were GPR. if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID; @@ -704,8 +691,8 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, // MSR/MSRsys: Rm mask=Inst{19-16} // BXJ: Rm // MSRi/MSRsysi: so_imm -// SRSW/SRS: addrmode4:$addr mode_imm -// RFEW/RFE: addrmode4:$addr Rn +// SRSW/SRS: ldstm_mode:$amode mode_imm +// RFEW/RFE: ldstm_mode:$amode Rn static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -733,35 +720,34 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, NumOpsAdded = 1; return true; } - // MSR and MSRsys take one GPR reg Rm, followed by the mask. - if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) { - assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && + // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in + // bit 4, and the special register fields in bits 3-0. + if (Opcode == ARM::MSR) { + assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID && "Reg operand expected"); + MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | + slice(insn, 19, 16) /* Special Reg */ )); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16))); NumOpsAdded = 2; return true; } - // MSRi and MSRsysi take one so_imm operand, followed by the mask. - if (Opcode == ARM::MSRi || Opcode == ARM::MSRsysi) { + // MSRi take a mask, followed by one so_imm operand. The mask contains the + // R Bit in bit 4, and the special register fields in bits 3-0. + if (Opcode == ARM::MSRi) { + MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | + slice(insn, 19, 16) /* Special Reg */ )); // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0. // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}. // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot(). unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF; unsigned Imm = insn & 0xFF; MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16))); NumOpsAdded = 2; return true; } - // SRSW and SRS requires addrmode4:$addr for ${addr:submode}, followed by the - // mode immediate (Inst{4-0}). if (Opcode == ARM::SRSW || Opcode == ARM::SRS || Opcode == ARM::RFEW || Opcode == ARM::RFE) { - // ARMInstPrinter::printAddrMode4Operand() prints special mode string - // if the base register is SP; so don't set ARM::SP. - MI.addOperand(MCOperand::CreateReg(0)); ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); @@ -807,9 +793,8 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // Misc. Branch Instructions. -// BR_JTadd, BR_JTr, BR_JTm // BLXr9, BXr9 -// BRIND, BX_RET +// BX, BX_RET static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -820,12 +805,12 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; - // BX_RET has only two predicate operands, do an early return. - if (Opcode == ARM::BX_RET) + // BX_RET and MOVPCLR have only two predicate operands; do an early return. + if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR) return true; - // BLXr9 and BRIND take one GPR reg. - if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) { + // BLXr9 and BX take one GPR reg. + if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) { assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -834,72 +819,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - // BR_JTadd is an ADD with Rd = PC, (Rn, Rm) as the target and index regs. - if (Opcode == ARM::BR_JTadd) { - // InOperandList with GPR:$target and GPR:$idx regs. - - assert(NumOps == 4 && "Expect 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - // Fill in the two remaining imm operands to signify build completion. - MI.addOperand(MCOperand::CreateImm(0)); - MI.addOperand(MCOperand::CreateImm(0)); - - OpIdx = 4; - return true; - } - - // BR_JTr is a MOV with Rd = PC, and Rm as the source register. - if (Opcode == ARM::BR_JTr) { - // InOperandList with GPR::$target reg. - - assert(NumOps == 3 && "Expect 3 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - - // Fill in the two remaining imm operands to signify build completion. - MI.addOperand(MCOperand::CreateImm(0)); - MI.addOperand(MCOperand::CreateImm(0)); - - OpIdx = 3; - return true; - } - - // BR_JTm is an LDR with Rt = PC. - if (Opcode == ARM::BR_JTm) { - // This is the reg/reg form, with base reg followed by +/- reg shop imm. - // See also ARMAddressingModes.h (Addressing Mode #2). - - assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); - - ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; - - // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRm(insn)))); - // Inst{6-5} encodes the shift opcode. - ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); - // Inst{11-7} encodes the imm5 shift amount. - unsigned ShImm = slice(insn, 11, 7); - - // A8.4.1. Possible rrx or shift amount of 32... - getImmShiftSE(ShOp, ShImm); - MI.addOperand(MCOperand::CreateImm( - ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp))); - - // Fill in the two remaining imm operands to signify build completion. - MI.addOperand(MCOperand::CreateImm(0)); - MI.addOperand(MCOperand::CreateImm(0)); - - OpIdx = 5; - return true; - } - return false; } @@ -1324,30 +1243,28 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5"); - - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; + NumOpsAdded = 0; unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base, if necessary. - if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) { + if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD || + Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD || + Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD || + Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) { MI.addOperand(MCOperand::CreateReg(Base)); - ++OpIdx; + ++NumOpsAdded; } + // Add the base register operand. MI.addOperand(MCOperand::CreateReg(Base)); - ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); - // Handling the two predicate operands before the reglist. int64_t CondVal = insn >> ARMII::CondShift; MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal)); MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 4; + NumOpsAdded += 3; // Fill the variadic part of reglist. unsigned RegListBits = insn & ((1 << 16) - 1); @@ -1355,7 +1272,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if ((RegListBits >> i) & 1) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); - ++OpIdx; + ++NumOpsAdded; } } @@ -1586,8 +1503,7 @@ static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) { } // A7.5.1 -#if 0 -static uint64_t VFPExpandImm(unsigned char byte, unsigned N) { +static APInt VFPExpandImm(unsigned char byte, unsigned N) { assert(N == 32 || N == 64); uint64_t Result; @@ -1602,13 +1518,12 @@ static uint64_t VFPExpandImm(unsigned char byte, unsigned N) { Result = (uint64_t)slice(byte, 7, 7) << 63 | (uint64_t)slice(byte, 5, 0) << 48; if (bit6) - Result |= 0xffL << 54; + Result |= 0xffULL << 54; else - Result |= 0x1L << 62; + Result |= 0x1ULL << 62; } - return Result; + return APInt(N, Result); } -#endif // VFP Unary Format Instructions: // @@ -1902,8 +1817,10 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base, if necessary. - if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD || - Opcode == ARM::VSTMD_UPD || Opcode == ARM::VSTMS_UPD) { + if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD || + Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD || + Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD || + Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) { MI.addOperand(MCOperand::CreateReg(Base)); ++OpIdx; } @@ -1926,8 +1843,10 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx += 4; - bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD || - Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD); + bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSDB || + Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD || + Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSDB || + Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; // Extract Dd/Sd. @@ -1985,10 +1904,14 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Extract/decode the f64/f32 immediate. if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - // The asm syntax specifies the before-expanded <imm>. - // Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0), - // Opcode == ARM::FCONSTD ? 64 : 32) - MI.addOperand(MCOperand::CreateImm(slice(insn,19,16)<<4 | slice(insn,3,0))); + // The asm syntax specifies the floating point value, not the 8-bit literal. + APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0), + Opcode == ARM::FCONSTD ? 64 : 32); + APFloat immFP = APFloat(immRaw, true); + double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() : + immFP.convertToFloat(); + MI.addOperand(MCOperand::CreateFPImm(imm)); + ++OpIdx; } @@ -2201,22 +2124,6 @@ static unsigned decodeN3VImm(uint32_t insn) { return (insn >> 8) & 0xF; } -static bool UseDRegPair(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case ARM::VLD1q8_UPD: - case ARM::VLD1q16_UPD: - case ARM::VLD1q32_UPD: - case ARM::VLD1q64_UPD: - case ARM::VST1q8_UPD: - case ARM::VST1q16_UPD: - case ARM::VST1q32_UPD: - case ARM::VST1q64_UPD: - return true; - } -} - // VLD* // D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] // VLD*LN* @@ -2243,10 +2150,9 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // We have homogeneous NEON registers for Load/Store. unsigned RegClass = 0; - bool DRegPair = UseDRegPair(Opcode); // Double-spaced registers have increments of 2. - unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1; + unsigned Inc = DblSpaced ? 2 : 1; unsigned Rn = decodeRn(insn); unsigned Rm = decodeRm(insn); @@ -2292,7 +2198,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[OpIdx].RegClass; while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd, DRegPair))); + getRegisterEnum(B, RegClass, Rd))); Rd += Inc; ++OpIdx; } @@ -2311,7 +2217,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, RegClass, Rd, DRegPair))); + getRegisterEnum(B, RegClass, Rd))); Rd += Inc; ++OpIdx; } @@ -2771,8 +2677,8 @@ static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode, return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, N3V_VectorShift, B); } -static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO B) { +static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, N3V_VectorExtract, B); @@ -2959,9 +2865,9 @@ static inline bool MemBarrierInstr(uint32_t insn) { static inline bool PreLoadOpcode(unsigned Opcode) { switch(Opcode) { - case ARM::PLDi: case ARM::PLDr: - case ARM::PLDWi: case ARM::PLDWr: - case ARM::PLIi: case ARM::PLIr: + case ARM::PLDi12: case ARM::PLDrs: + case ARM::PLDWi12: case ARM::PLDWrs: + case ARM::PLIi12: case ARM::PLIrs: return true; default: return false; @@ -2971,18 +2877,21 @@ static inline bool PreLoadOpcode(unsigned Opcode) { static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - // Preload Data/Instruction requires either 2 or 4 operands. - // PLDi, PLDWi, PLIi: Rn [+/-]imm12 add = (U == '1') - // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc + // Preload Data/Instruction requires either 2 or 3 operands. + // PLDi, PLDWi, PLIi: addrmode_imm12 + // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) { + if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12 + || Opcode == ARM::PLIi12) { unsigned Imm12 = slice(insn, 11, 0); bool Negative = getUBit(insn) == 0; - int Offset = Negative ? -1 - Imm12 : 1 * Imm12; - MI.addOperand(MCOperand::CreateImm(Offset)); + // -0 is represented specially. All other values are as normal. + if (Imm12 == 0 && Negative) + Imm12 = INT32_MIN; + MI.addOperand(MCOperand::CreateImm(Imm12)); NumOpsAdded = 2; } else { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -3026,22 +2935,36 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, case ARM::WFE: case ARM::WFI: case ARM::SEV: - case ARM::SETENDBE: - case ARM::SETENDLE: return true; default: break; } - // CPS has a singleton $opt operand that contains the following information: - // opt{4-0} = mode from Inst{4-0} - // opt{5} = changemode from Inst{17} - // opt{8-6} = AIF from Inst{8-6} - // opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable - if (Opcode == ARM::CPS) { - unsigned Option = slice(insn, 4, 0) | slice(insn, 17, 17) << 5 | - slice(insn, 8, 6) << 6 | slice(insn, 19, 18) << 9; - MI.addOperand(MCOperand::CreateImm(Option)); + if (Opcode == ARM::SETEND) { + NumOpsAdded = 1; + MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9))); + return true; + } + + // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different + // opcodes which match the same real instruction. This is needed since there's + // no current handling of optional arguments. Fix here when a better handling + // of optional arguments is implemented. + if (Opcode == ARM::CPS3p) { + MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod + MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags + MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode + NumOpsAdded = 3; + return true; + } + if (Opcode == ARM::CPS2p) { + MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod + MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags + NumOpsAdded = 2; + return true; + } + if (Opcode == ARM::CPS1p) { + MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode NumOpsAdded = 1; return true; } diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..b23dd6b --- /dev/null +++ b/lib/Target/ARM/Disassembler/CMakeLists.txt @@ -0,0 +1,14 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMARMDisassembler + ARMDisassembler.cpp + ARMDisassemblerCore.cpp + ) +# workaround for hanging compilation on MSVC8, 9 and 10 +if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) +set_property( + SOURCE ARMDisassembler.cpp + PROPERTY COMPILE_FLAGS "/Od" + ) +endif() +add_dependencies(LLVMARMDisassembler ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 112817b..23372e0 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -564,6 +564,38 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, // t_addrmode_sp := sp + imm8 * 4 // +// A8.6.63 LDRB (literal) +// A8.6.79 LDRSB (literal) +// A8.6.75 LDRH (literal) +// A8.6.83 LDRSH (literal) +// A8.6.59 LDR (literal) +// +// These instrs calculate an address from the PC value and an immediate offset. +// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1) +static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + + const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + + assert(NumOps >= 2 && + OpInfo[0].RegClass == ARM::GPRRegClassID && + OpInfo[1].RegClass < 0 && + "Expect >= 2 operands, first as reg, and second as imm operand"); + + // Build the register operand, followed by the (+/-)imm12 immediate. + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + + MI.addOperand(MCOperand::CreateImm(decodeImm12(insn))); + + NumOpsAdded = 2; + + return true; +} + + // A6.2.4 Load/store single data item // // Load/Store Register (reg|imm): tRd tRn imm5 tRm @@ -796,14 +828,13 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, } // CPS has a singleton $opt operand that contains the following information: - // opt{4-0} = don't care - // opt{5} = 0 (false) - // opt{8-6} = AIF from Inst{2-0} - // opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable + // The first op would be 0b10 as enable and 0b11 as disable in regular ARM, + // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's + // default one. The second get the AIF flags from Inst{2-0}. if (Opcode == ARM::tCPS) { - unsigned Option = slice(insn, 2, 0) << 6 | slice(insn, 4, 4) << 9 | 1 << 10; - MI.addOperand(MCOperand::CreateImm(Option)); - NumOpsAdded = 1; + MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4))); + MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0))); + NumOpsAdded = 2; return true; } @@ -833,40 +864,32 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.53 LDM / LDMIA // A8.6.189 STM / STMIA // -// tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list -// tLDM: tRt AM4ModeImm Pred-Imm Pred-CCR register_list +// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list +// tLDMIA: tRt AM4ModeImm Pred-Imm Pred-CCR register_list static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD || - Opcode == ARM::tSTM_UPD) && "Unexpected opcode"); - - unsigned &OpIdx = NumOpsAdded; + uint32_t insn, unsigned short NumOps, + unsigned &NumOpsAdded, BO B) { + assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD || + Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode"); unsigned tRt = getT1tRt(insn); - - OpIdx = 0; + NumOpsAdded = 0; // WB register, if necessary. - if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) { + if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, tRt))); - ++OpIdx; + ++NumOpsAdded; } MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, tRt))); - ++OpIdx; - - // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1 - // A8.6.53 STM / STMIA / STMEA - Encoding T1 - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))); - ++OpIdx; + ++NumOpsAdded; // Handling the two predicate operands before the reglist. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - OpIdx += 2; - else { + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) { + NumOpsAdded += 2; + } else { DEBUG(errs() << "Expected predicate operands not found.\n"); return false; } @@ -874,13 +897,12 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, unsigned RegListBits = slice(insn, 7, 0); // Fill the variadic part of reglist. - for (unsigned i = 0; i < 8; ++i) { + for (unsigned i = 0; i < 8; ++i) if ((RegListBits >> i) & 1) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, i))); - ++OpIdx; + ++NumOpsAdded; } - } return true; } @@ -959,22 +981,23 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, // corresponding to op. // // Table A6-1 16-bit Thumb instruction encoding (abridged) -// op Instruction or instruction class -// ------ -------------------------------------------------------------------- -// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7 -// 010000 Data-processing on page A6-8 -// 010001 Special data instructions and branch and exchange on page A6-9 -// 01001x Load from Literal Pool, see LDR (literal) on page A8-122 -// 0101xx Load/store single data item on page A6-10 +// op Instruction or instruction class +// ------ -------------------------------------------------------------------- +// 00xxxx Shift (immediate), add, subtract, move, and compare on page A6-7 +// 010000 Data-processing on page A6-8 +// 010001 Special data instructions and branch and exchange on page A6-9 +// 01001x Load from Literal Pool, see LDR (literal) on page A8-122 +// 0101xx Load/store single data item on page A6-10 // 011xxx // 100xxx -// 10100x Generate PC-relative address, see ADR on page A8-32 -// 10101x Generate SP-relative address, see ADD (SP plus immediate) on page A8-28 -// 1011xx Miscellaneous 16-bit instructions on page A6-11 -// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374 -// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a -// 1101xx Conditional branch, and Supervisor Call on page A6-13 -// 11100x Unconditional Branch, see B on page A8-44 +// 10100x Generate PC-relative address, see ADR on page A8-32 +// 10101x Generate SP-relative address, see ADD (SP plus immediate) on +// page A8-28 +// 1011xx Miscellaneous 16-bit instructions on page A6-11 +// 11000x Store multiple registers, see STM / STMIA / STMEA on page A8-374 +// 11001x Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a +// 1101xx Conditional branch, and Supervisor Call on page A6-13 +// 11100x Unconditional Branch, see B on page A8-44 // static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -1121,34 +1144,31 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, if (Thumb2RFEOpcode(Opcode)) return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B); - assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD || - Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD) + assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD || + Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD || + Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD || + Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD) && "Unexpected opcode"); assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5"); - unsigned &OpIdx = NumOpsAdded; - - OpIdx = 0; + NumOpsAdded = 0; unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base. - if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) { + if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD || + Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) { MI.addOperand(MCOperand::CreateReg(Base)); - ++OpIdx; + ++NumOpsAdded; } MI.addOperand(MCOperand::CreateReg(Base)); - ++OpIdx; - - ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); - ++OpIdx; + ++NumOpsAdded; // Handling the two predicate operands before the reglist. - if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) - OpIdx += 2; - else { + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) { + NumOpsAdded += 2; + } else { DEBUG(errs() << "Expected predicate operands not found.\n"); return false; } @@ -1156,13 +1176,12 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegListBits = insn & ((1 << 16) - 1); // Fill the variadic part of reglist. - for (unsigned i = 0; i < 16; ++i) { + for (unsigned i = 0; i < 16; ++i) if ((RegListBits >> i) & 1) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); - ++OpIdx; + ++NumOpsAdded; } - } return true; } @@ -1260,13 +1279,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, return true; } -// PC-based defined for Codegen, which do not get decoded by design: -// -// t2TBB, t2TBH: Rm immDontCare immDontCare -// -// Generic version defined for disassembly: -// -// t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR +// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -1401,7 +1414,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, // // Two register operands: Rs Rn ModImm // One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm -// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm - {t2MOVi, t2MVNi} +// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm - +// {t2MOVi, t2MVNi} // // ModImm = ThumbExpandImm(i:imm3:imm8) static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, @@ -1644,15 +1658,25 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, break; } - // CPS has a singleton $opt operand that contains the following information: - // opt{4-0} = mode from Inst{4-0} - // opt{5} = changemode from Inst{8} - // opt{8-6} = AIF from Inst{7-5} - // opt{10-9} = imod from Inst{10-9} with 0b10 as enable and 0b11 as disable - if (Opcode == ARM::t2CPS) { - unsigned Option = slice(insn, 4, 0) | slice(insn, 8, 8) << 5 | - slice(insn, 7, 5) << 6 | slice(insn, 10, 9) << 9; - MI.addOperand(MCOperand::CreateImm(Option)); + // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different + // opcodes which match the same real instruction. This is needed since there's + // no current handling of optional arguments. Fix here when a better handling + // of optional arguments is implemented. + if (Opcode == ARM::t2CPS3p) { + MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod + MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags + MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode + NumOpsAdded = 3; + return true; + } + if (Opcode == ARM::t2CPS2p) { + MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod + MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5))); // iflags + NumOpsAdded = 2; + return true; + } + if (Opcode == ARM::t2CPS1p) { + MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode NumOpsAdded = 1; return true; } @@ -1678,11 +1702,13 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, NumOpsAdded = 1; return true; } - // MSR and MSRsys take one GPR reg Rn, followed by the mask. - if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) { + // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in + // bit 4, and the special register fields in bits 3-0. + if (Opcode == ARM::t2MSR) { + MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ | + slice(insn, 11, 8) /* Special Reg */)); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8))); NumOpsAdded = 2; return true; } @@ -1728,12 +1754,12 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) { switch (Opcode) { default: return false; - case ARM::t2PLDi12: case ARM::t2PLDi8: case ARM::t2PLDpci: - case ARM::t2PLDr: case ARM::t2PLDs: - case ARM::t2PLDWi12: case ARM::t2PLDWi8: case ARM::t2PLDWpci: - case ARM::t2PLDWr: case ARM::t2PLDWs: - case ARM::t2PLIi12: case ARM::t2PLIi8: case ARM::t2PLIpci: - case ARM::t2PLIr: case ARM::t2PLIs: + case ARM::t2PLDi12: case ARM::t2PLDi8: + case ARM::t2PLDs: + case ARM::t2PLDWi12: case ARM::t2PLDWi8: + case ARM::t2PLDWs: + case ARM::t2PLIi12: case ARM::t2PLIi8: + case ARM::t2PLIs: return true; } } @@ -1769,11 +1795,10 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); int Offset = 0; - if (Opcode == ARM::t2PLDpci || Opcode == ARM::t2PLDWpci || - Opcode == ARM::t2PLIpci) { + if (slice(insn, 19, 16) == 0xFF) { bool Negative = slice(insn, 23, 23) == 0; unsigned Imm12 = getImm12(insn); - Offset = Negative ? -1 - Imm12 : 1 * Imm12; + Offset = Negative ? -1 - Imm12 : 1 * Imm12; } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || Opcode == ARM::t2PLIi8) { // A8.6.117 Encoding T2: add = FALSE @@ -1795,37 +1820,6 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } -// A8.6.63 LDRB (literal) -// A8.6.79 LDRSB (literal) -// A8.6.75 LDRH (literal) -// A8.6.83 LDRSH (literal) -// A8.6.59 LDR (literal) -// -// These instrs calculate an address from the PC value and an immediate offset. -// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1) -static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; - if (!OpInfo) return false; - - assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass < 0 && - "Expect >= 2 operands, first as reg, and second as imm operand"); - - // Build the register operand, followed by the (+/-)imm12 immediate. - - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); - - MI.addOperand(MCOperand::CreateImm(decodeImm12(insn))); - - NumOpsAdded = 2; - - return true; -} - // A6.3.10 Store single data item // A6.3.9 Load byte, memory hints // A6.3.8 Load halfword, memory hints @@ -1835,13 +1829,15 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // // t2LDRi12: Rd Rn (+)imm12 // t2LDRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1) -// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg) +// t2LDRs: Rd Rn Rm ConstantShiftSpecifier (see also +// DisassembleThumb2DPSoReg) // t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) // t2LDR_PRE: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) // // t2STRi12: Rd Rn (+)imm12 // t2STRi8: Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1) -// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg) +// t2STRs: Rd Rn Rm ConstantShiftSpecifier (see also +// DisassembleThumb2DPSoReg) // t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) // t2STR_PRE: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1) // @@ -1862,7 +1858,6 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, // See, for example, A6.3.7 Load word: Table A6-18 Load word. if (Load && Rn == 15) return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B); - const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1909,7 +1904,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, else Imm = decodeImm8(insn); } - + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, R0))); ++OpIdx; @@ -2081,25 +2076,29 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, // corresponding to (op1, op2, op). // // Table A6-9 32-bit Thumb instruction encoding -// op1 op2 op Instruction class, see -// --- ------- -- ------------------------------------------------------------ -// 01 00xx0xx - Load/store multiple on page A6-23 -// 00xx1xx - Load/store dual, load/store exclusive, table branch on page A6-24 -// 01xxxxx - Data-processing (shifted register) on page A6-31 -// 1xxxxxx - Coprocessor instructions on page A6-40 -// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15 -// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19 -// - 1 Branches and miscellaneous control on page A6-20 -// 11 000xxx0 - Store single data item on page A6-30 -// 001xxx0 - Advanced SIMD element or structure load/store instructions on page A7-27 -// 00xx001 - Load byte, memory hints on page A6-28 -// 00xx011 - Load halfword, memory hints on page A6-26 -// 00xx101 - Load word on page A6-25 -// 00xx111 - UNDEFINED -// 010xxxx - Data-processing (register) on page A6-33 -// 0110xxx - Multiply, multiply accumulate, and absolute difference on page A6-38 -// 0111xxx - Long multiply, long multiply accumulate, and divide on page A6-39 -// 1xxxxxx - Coprocessor instructions on page A6-40 +// op1 op2 op Instruction class, see +// --- ------- -- ----------------------------------------------------------- +// 01 00xx0xx - Load/store multiple on page A6-23 +// 00xx1xx - Load/store dual, load/store exclusive, table branch on +// page A6-24 +// 01xxxxx - Data-processing (shifted register) on page A6-31 +// 1xxxxxx - Coprocessor instructions on page A6-40 +// 10 x0xxxxx 0 Data-processing (modified immediate) on page A6-15 +// x1xxxxx 0 Data-processing (plain binary immediate) on page A6-19 +// - 1 Branches and miscellaneous control on page A6-20 +// 11 000xxx0 - Store single data item on page A6-30 +// 001xxx0 - Advanced SIMD element or structure load/store instructions +// on page A7-27 +// 00xx001 - Load byte, memory hints on page A6-28 +// 00xx011 - Load halfword, memory hints on page A6-26 +// 00xx101 - Load word on page A6-25 +// 00xx111 - UNDEFINED +// 010xxxx - Data-processing (register) on page A6-33 +// 0110xxx - Multiply, multiply accumulate, and absolute difference on +// page A6-38 +// 0111xxx - Long multiply, long multiply accumulate, and divide on +// page A6-39 +// 1xxxxxx - Coprocessor instructions on page A6-40 // static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, @@ -2130,7 +2129,7 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded, B); } - if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) { + if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) { // Table branch. return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B); } @@ -2175,7 +2174,8 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, } } else { // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word - return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B); + return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps, + NumOpsAdded, B); } break; case 1: @@ -2229,7 +2229,7 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // A6.3 32-bit Thumb instruction encoding - + uint16_t op1 = slice(HalfWord, 12, 11); uint16_t op2 = slice(HalfWord, 10, 4); uint16_t op = slice(insn, 15, 15); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp new file mode 100644 index 0000000..1499da0 --- /dev/null +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -0,0 +1,711 @@ +//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an ARM MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "ARMBaseInfo.h" +#include "ARMInstPrinter.h" +#include "ARMAddressingModes.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "ARMGenAsmWriter.inc" + +StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + + +void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { + unsigned Opcode = MI->getOpcode(); + + // Check for MOVs and print canonical forms, instead. + if (Opcode == ARM::MOVs) { + // FIXME: Thumb variants? + const MCOperand &Dst = MI->getOperand(0); + const MCOperand &MO1 = MI->getOperand(1); + const MCOperand &MO2 = MI->getOperand(2); + const MCOperand &MO3 = MI->getOperand(3); + + O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())); + printSBitModifierOperand(MI, 6, O); + printPredicateOperand(MI, 4, O); + + O << '\t' << getRegisterName(Dst.getReg()) + << ", " << getRegisterName(MO1.getReg()); + + if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx) + return; + + O << ", "; + + if (MO2.getReg()) { + O << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } + return; + } + + // A8.6.123 PUSH + if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + O << '\t' << "push"; + printPredicateOperand(MI, 2, O); + if (Opcode == ARM::t2STMDB_UPD) + O << ".w"; + O << '\t'; + printRegisterList(MI, 4, O); + return; + } + + // A8.6.122 POP + if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + O << '\t' << "pop"; + printPredicateOperand(MI, 2, O); + if (Opcode == ARM::t2LDMIA_UPD) + O << ".w"; + O << '\t'; + printRegisterList(MI, 4, O); + return; + } + + // A8.6.355 VPUSH + if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + O << '\t' << "vpush"; + printPredicateOperand(MI, 2, O); + O << '\t'; + printRegisterList(MI, 4, O); + return; + } + + // A8.6.354 VPOP + if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + O << '\t' << "vpop"; + printPredicateOperand(MI, 2, O); + O << '\t'; + printRegisterList(MI, 4, O); + return; + } + + printInstruction(MI, O); +} + +void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << *Op.getExpr(); + } +} + +static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream, + const MCAsmInfo *MAI) { + // Break it up into two parts that make up a shifter immediate. + V = ARM_AM::getSOImmVal(V); + assert(V != -1 && "Not a valid so_imm value!"); + + unsigned Imm = ARM_AM::getSOImmValImm(V); + unsigned Rot = ARM_AM::getSOImmValRot(V); + + // Print low-level immediate formation info, per + // A5.1.3: "Data-processing operands - Immediate". + if (Rot) { + O << "#" << Imm << ", " << Rot; + // Pretty printed version. + if (CommentStream) + *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n"; + } else { + O << "#" << Imm; + } +} + + +/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit +/// immediate in bits 0-7. +void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + assert(MO.isImm() && "Not a valid so_imm value!"); + printSOImm(O, MO.getImm(), CommentStream, &MAI); +} + +// so_reg is a 4-operand unit corresponding to register forms of the A5.1 +// "Addressing Mode 1 - Data-processing operands" forms. This includes: +// REG 0 0 - e.g. R5 +// REG REG 0,SH_OPC - e.g. R5, ROR R3 +// REG 0 IMM,SH_OPC - e.g. R5, LSL #3 +void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << getRegisterName(MO1.getReg()); + + // Print the shift opc. + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (MO2.getReg()) { + O << ' ' << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else if (ShOpc != ARM_AM::rrx) { + O << " #" << ARM_AM::getSORegOffset(MO3.getImm()); + } +} + + +void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, O); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + + if (!MO2.getReg()) { + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + O << ", #" + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << ARM_AM::getAM2Offset(MO3.getImm()); + O << "]"; + return; + } + + O << ", " + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << getRegisterName(MO2.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) + << " #" << ShImm; + O << "]"; +} + +void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.getReg()) { + unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << ImmOffs; + return; + } + + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << getRegisterName(MO1.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) + << " #" << ShImm; +} + +void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << '[' << getRegisterName(MO1.getReg()); + + if (MO2.getReg()) { + O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm()) + << getRegisterName(MO2.getReg()) << ']'; + return; + } + + if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) + O << ", #" + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) + << ImmOffs; + O << ']'; +} + +void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (MO1.getReg()) { + O << (char)ARM_AM::getAM3Op(MO2.getImm()) + << getRegisterName(MO1.getReg()); + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) + << ImmOffs; +} + +void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum) + .getImm()); + O << ARM_AM::getAMSubModeStr(Mode); +} + +void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, O); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + + if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { + O << ", #" + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) + << ImmOffs * 4; + } + O << "]"; +} + +void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + if (MO2.getImm()) { + // FIXME: Both darwin as and GNU as violate ARM docs here. + O << ", :" << (MO2.getImm() << 3); + } + O << "]"; +} + +void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); +} + +void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + uint32_t v = ~MO.getImm(); + int32_t lsb = CountTrailingZeros_32(v); + int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; + assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); + O << '#' << lsb << ", #" << width; +} + +void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned val = MI->getOperand(OpNum).getImm(); + O << ARM_MB::MemBOptToString(val); +} + +void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned ShiftOp = MI->getOperand(OpNum).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + switch (Opc) { + case ARM_AM::no_shift: + return; + case ARM_AM::lsl: + O << ", lsl #"; + break; + case ARM_AM::asr: + O << ", asr #"; + break; + default: + assert(0 && "unexpected shift opcode for shift immediate operand"); + } + O << ARM_AM::getSORegOffset(ShiftOp); +} + +void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "{"; + for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { + if (i != OpNum) O << ", "; + O << getRegisterName(MI->getOperand(i).getReg()); + } + O << "}"; +} + +void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + if (Op.getImm()) + O << "be"; + else + O << "le"; +} + +void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + O << ARM_PROC::IModToString(Op.getImm()); +} + +void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + unsigned IFlags = Op.getImm(); + for (int i=2; i >= 0; --i) + if (IFlags & (1 << i)) + O << ARM_PROC::IFlagsToString(1 << i); +} + +void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + unsigned SpecRegRBit = Op.getImm() >> 4; + unsigned Mask = Op.getImm() & 0xf; + + if (SpecRegRBit) + O << "spsr"; + else + O << "cpsr"; + + if (Mask) { + O << '_'; + if (Mask & 8) O << 'f'; + if (Mask & 4) O << 's'; + if (Mask & 2) O << 'x'; + if (Mask & 1) O << 'c'; + } +} + +void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + O << '#'; + if (Op.getImm() < 0) + O << '-' << (-Op.getImm() - 1); + else + O << Op.getImm(); +} + +void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); + if (CC != ARMCC::AL) + O << ARMCondCodeToString(CC); +} + +void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); + O << ARMCondCodeToString(CC); +} + +void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + if (MI->getOperand(OpNum).getReg()) { + assert(MI->getOperand(OpNum).getReg() == ARM::CPSR && + "Expect ARM CPSR register!"); + O << 's'; + } +} + +void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "p" << MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "c" << MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); +} + +void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#" << MI->getOperand(OpNum).getImm() * 4; +} + +void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // (3 - the number of trailing zeros) is the number of then / else. + unsigned Mask = MI->getOperand(OpNum).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { + bool T = ((Mask >> Pos) & 1) == CondBit0; + if (T) + O << 't'; + else + O << 'e'; + } +} + +void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op + 1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, O); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + if (unsigned RegNum = MO2.getReg()) + O << ", " << getRegisterName(RegNum); + O << "]"; +} + +void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, + unsigned Op, + raw_ostream &O, + unsigned Scale) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op + 1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, O); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs * Scale; + O << "]"; +} + +void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI, + unsigned Op, + raw_ostream &O) { + printThumbAddrModeImm5SOperand(MI, Op, O, 1); +} + +void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI, + unsigned Op, + raw_ostream &O) { + printThumbAddrModeImm5SOperand(MI, Op, O, 2); +} + +void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI, + unsigned Op, + raw_ostream &O) { + printThumbAddrModeImm5SOperand(MI, Op, O, 4); +} + +void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + printThumbAddrModeImm5SOperand(MI, Op, O, 4); +} + +// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 +// register with shift forms. +// REG 0 0 - e.g. R5 +// REG IMM, SH_OPC - e.g. R5, LSL #3 +void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + unsigned Reg = MO1.getReg(); + O << getRegisterName(Reg); + + // Print the shift opc. + assert(MO2.isImm() && "Not a valid t2_so_reg value!"); + ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm()); + O << ", " << ARM_AM::getShiftOpcStr(ShOpc); + if (ShOpc != ARM_AM::rrx) + O << " #" << ARM_AM::getSORegOffset(MO2.getImm()); +} + +void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, O); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm(); + bool isSub = OffImm < 0; + // Special value for #-0. All others are normal. + if (OffImm == INT32_MIN) + OffImm = 0; + if (isSub) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #" << OffImm; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm(); + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #" << OffImm; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm * 4; + else if (OffImm > 0) + O << ", #" << OffImm * 4; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm(); + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm; + else if (OffImm > 0) + O << "#" << OffImm; +} + +void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm * 4; + else if (OffImm > 0) + O << "#" << OffImm * 4; +} + +void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << "[" << getRegisterName(MO1.getReg()); + + assert(MO2.getReg() && "Invalid so_reg load / store address!"); + O << ", " << getRegisterName(MO2.getReg()); + + unsigned ShAmt = MO3.getImm(); + if (ShAmt) { + assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); + O << ", lsl #" << ShAmt; + } + O << "]"; +} + +void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + O << '#'; + if (MO.isFPImm()) { + O << (float)MO.getFPImm(); + } else { + union { + uint32_t I; + float F; + } FPUnion; + + FPUnion.I = MO.getImm(); + O << FPUnion.F; + } +} + +void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + O << '#'; + if (MO.isFPImm()) { + O << MO.getFPImm(); + } else { + // We expect the binary encoding of a floating point number here. + union { + uint64_t I; + double D; + } FPUnion; + + FPUnion.I = MO.getImm(); + O << FPUnion.D; + } +} + +void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned EncodedImm = MI->getOperand(OpNum).getImm(); + unsigned EltBits; + uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + O << "#0x" << utohexstr(Val); +} diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h new file mode 100644 index 0000000..679d313 --- /dev/null +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -0,0 +1,111 @@ +//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an ARM MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMINSTPRINTER_H +#define ARMINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + +class ARMInstPrinter : public MCInstPrinter { +public: + ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} + + virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual StringRef getOpcodeName(unsigned Opcode) const; + + static const char *getInstructionName(unsigned Opcode); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O, unsigned Scale); + void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printSBitModifierOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..18645c0 --- /dev/null +++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMARMAsmPrinter + ARMInstPrinter.cpp + ) +add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/InstPrinter/Makefile b/lib/Target/ARM/InstPrinter/Makefile new file mode 100644 index 0000000..65d372e --- /dev/null +++ b/lib/Target/ARM/InstPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMAsmPrinter + +# Hack: we need to include 'main' arm target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp new file mode 100644 index 0000000..f9e86eb --- /dev/null +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -0,0 +1,321 @@ +//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of +// multiple and add / sub instructions) when special VMLx hazards are detected. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mlx-expansion" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> +ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden); +static cl::opt<unsigned> +ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden); + +STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded"); + +namespace { + struct MLxExpansion : public MachineFunctionPass { + static char ID; + MLxExpansion() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM MLA / MLS expansion pass"; + } + + private: + const ARMBaseInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + unsigned MIIdx; + MachineInstr* LastMIs[4]; + + void clearStack(); + void pushStack(MachineInstr *MI); + MachineInstr *getAccDefMI(MachineInstr *MI) const; + unsigned getDefReg(MachineInstr *MI) const; + bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; + bool FindMLxHazard(MachineInstr *MI) const; + void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned MulOpc, unsigned AddSubOpc, + bool NegAcc, bool HasLane); + bool ExpandFPMLxInstructions(MachineBasicBlock &MBB); + }; + char MLxExpansion::ID = 0; +} + +void MLxExpansion::clearStack() { + std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0); + MIIdx = 0; +} + +void MLxExpansion::pushStack(MachineInstr *MI) { + LastMIs[MIIdx] = MI; + if (++MIIdx == 4) + MIIdx = 0; +} + +MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { + // Look past COPY and INSERT_SUBREG instructions to find the + // real definition MI. This is important for _sfp instructions. + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return 0; + + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *DefMI = MRI->getVRegDef(Reg); + while (true) { + if (DefMI->getParent() != MBB) + break; + if (DefMI->isCopyLike()) { + Reg = DefMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } else if (DefMI->isInsertSubreg()) { + Reg = DefMI->getOperand(2).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + DefMI = MRI->getVRegDef(Reg); + continue; + } + } + break; + } + return DefMI; +} + +unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !MRI->hasOneNonDBGUse(Reg)) + return Reg; + + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg); + if (UseMI->getParent() != MBB) + return Reg; + + while (UseMI->isCopy() || UseMI->isInsertSubreg()) { + Reg = UseMI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !MRI->hasOneNonDBGUse(Reg)) + return Reg; + UseMI = &*MRI->use_nodbg_begin(Reg); + if (UseMI->getParent() != MBB) + return Reg; + } + + return Reg; +} + +bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { + const TargetInstrDesc &TID = MI->getDesc(); + // FIXME: Detect integer instructions properly. + unsigned Domain = TID.TSFlags & ARMII::DomainMask; + if (Domain == ARMII::DomainVFP) { + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || + Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + } else if (Domain == ARMII::DomainNEON) { + if (TID.mayStore() || TID.mayLoad()) + return false; + } else { + return false; + } + + return MI->readsRegister(Reg, TRI); + return false; +} + + +bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { + if (NumExpand >= ExpandLimit) + return false; + + if (ForceExapnd) + return true; + + MachineInstr *DefMI = getAccDefMI(MI); + if (TII->isFpMLxInstruction(DefMI->getOpcode())) + // r0 = vmla + // r3 = vmla r0, r1, r2 + // takes 16 - 17 cycles + // + // r0 = vmla + // r4 = vmul r1, r2 + // r3 = vadd r0, r4 + // takes about 14 - 15 cycles even with vmul stalling for 4 cycles. + return true; + + // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the + // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall + // preserves the in-order retirement of the instructions. + // Look at the next few instructions, if *most* of them can cause hazards, + // then the scheduler can't *fix* this, we'd better break up the VMLA. + for (unsigned i = 1; i <= 4; ++i) { + int Idx = ((int)MIIdx - i + 4) % 4; + MachineInstr *NextMI = LastMIs[Idx]; + if (!NextMI) + continue; + + if (TII->canCauseFpMLxStall(NextMI->getOpcode())) + return true; + + // Look for VMLx RAW hazard. + if (hasRAWHazard(getDefReg(MI), NextMI)) + return true; + } + + return false; +} + +/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair +/// of MUL + ADD / SUB instructions. +void +MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned MulOpc, unsigned AddSubOpc, + bool NegAcc, bool HasLane) { + unsigned DstReg = MI->getOperand(0).getReg(); + bool DstDead = MI->getOperand(0).isDead(); + unsigned AccReg = MI->getOperand(1).getReg(); + unsigned Src1Reg = MI->getOperand(2).getReg(); + unsigned Src2Reg = MI->getOperand(3).getReg(); + bool Src1Kill = MI->getOperand(2).isKill(); + bool Src2Kill = MI->getOperand(3).isKill(); + unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0; + unsigned NextOp = HasLane ? 5 : 4; + ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm(); + unsigned PredReg = MI->getOperand(++NextOp).getReg(); + + const TargetInstrDesc &TID1 = TII->get(MulOpc); + const TargetInstrDesc &TID2 = TII->get(AddSubOpc); + unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI)); + + MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg) + .addReg(Src1Reg, getKillRegState(Src1Kill)) + .addReg(Src2Reg, getKillRegState(Src2Kill)); + if (HasLane) + MIB.addImm(LaneImm); + MIB.addImm(Pred).addReg(PredReg); + + MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); + + if (NegAcc) { + bool AccKill = MRI->hasOneNonDBGUse(AccReg); + MIB.addReg(TmpReg, getKillRegState(true)) + .addReg(AccReg, getKillRegState(AccKill)); + } else { + MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true)); + } + MIB.addImm(Pred).addReg(PredReg); + + DEBUG({ + dbgs() << "Expanding: " << *MI; + dbgs() << " to:\n"; + MachineBasicBlock::iterator MII = MI; + MII = llvm::prior(MII); + MachineInstr &MI2 = *MII; + MII = llvm::prior(MII); + MachineInstr &MI1 = *MII; + dbgs() << " " << MI1; + dbgs() << " " << MI2; + }); + + MI->eraseFromParent(); + ++NumExpand; +} + +bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { + bool Changed = false; + + clearStack(); + + unsigned Skip = 0; + MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); + while (MII != E) { + MachineInstr *MI = &*MII; + + if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) { + ++MII; + continue; + } + + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.isBarrier()) { + clearStack(); + Skip = 0; + ++MII; + continue; + } + + unsigned Domain = TID.TSFlags & ARMII::DomainMask; + if (Domain == ARMII::DomainGeneral) { + if (++Skip == 2) + // Assume dual issues of non-VFP / NEON instructions. + pushStack(0); + } else { + Skip = 0; + + unsigned MulOpc, AddSubOpc; + bool NegAcc, HasLane; + if (!TII->isFpMLxInstruction(TID.getOpcode(), + MulOpc, AddSubOpc, NegAcc, HasLane) || + !FindMLxHazard(MI)) + pushStack(MI); + else { + ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane); + E = MBB.rend(); // May have changed if MI was the 1st instruction. + Changed = true; + continue; + } + } + + ++MII; + } + + return Changed; +} + +bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { + TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= ExpandFPMLxInstructions(MBB); + } + + return Modified; +} + +FunctionPass *llvm::createMLxExpansionPass() { + return new MLxExpansion(); +} diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index b3fcfaf6..65a6494 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -18,8 +18,8 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ ARMGenDecoderTables.inc ARMGenEDInfo.inc \ - ARMGenFastISel.inc + ARMGenFastISel.inc ARMGenMCCodeEmitter.inc -DIRS = AsmPrinter AsmParser Disassembler TargetInfo +DIRS = InstPrinter AsmParser Disassembler TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp deleted file mode 100644 index 3407ac6..0000000 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ /dev/null @@ -1,406 +0,0 @@ -//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "neon-prealloc" -#include "ARM.h" -#include "ARMInstrInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -using namespace llvm; - -namespace { - class NEONPreAllocPass : public MachineFunctionPass { - const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; - - public: - static char ID; - NEONPreAllocPass() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "NEON register pre-allocation pass"; - } - - private: - bool FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs, - unsigned Offset, unsigned Stride) const; - bool PreAllocNEONRegisters(MachineBasicBlock &MBB); - }; - - char NEONPreAllocPass::ID = 0; -} - -static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, - unsigned &Offset, unsigned &Stride) { - // Default to unit stride with no offset. - Stride = 1; - Offset = 0; - - switch (Opcode) { - default: - break; - - case ARM::VLD2LNd8: - case ARM::VLD2LNd16: - case ARM::VLD2LNd32: - FirstOpnd = 0; - NumRegs = 2; - return true; - - case ARM::VLD2LNq16: - case ARM::VLD2LNq32: - FirstOpnd = 0; - NumRegs = 2; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD2LNq16odd: - case ARM::VLD2LNq32odd: - FirstOpnd = 0; - NumRegs = 2; - Offset = 1; - Stride = 2; - return true; - - case ARM::VLD3LNd8: - case ARM::VLD3LNd16: - case ARM::VLD3LNd32: - FirstOpnd = 0; - NumRegs = 3; - return true; - - case ARM::VLD3LNq16: - case ARM::VLD3LNq32: - FirstOpnd = 0; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD3LNq16odd: - case ARM::VLD3LNq32odd: - FirstOpnd = 0; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - - case ARM::VLD4LNd8: - case ARM::VLD4LNd16: - case ARM::VLD4LNd32: - FirstOpnd = 0; - NumRegs = 4; - return true; - - case ARM::VLD4LNq16: - case ARM::VLD4LNq32: - FirstOpnd = 0; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VLD4LNq16odd: - case ARM::VLD4LNq32odd: - FirstOpnd = 0; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - - case ARM::VST2LNd8: - case ARM::VST2LNd16: - case ARM::VST2LNd32: - FirstOpnd = 2; - NumRegs = 2; - return true; - - case ARM::VST2LNq16: - case ARM::VST2LNq32: - FirstOpnd = 2; - NumRegs = 2; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST2LNq16odd: - case ARM::VST2LNq32odd: - FirstOpnd = 2; - NumRegs = 2; - Offset = 1; - Stride = 2; - return true; - - case ARM::VST3LNd8: - case ARM::VST3LNd16: - case ARM::VST3LNd32: - FirstOpnd = 2; - NumRegs = 3; - return true; - - case ARM::VST3LNq16: - case ARM::VST3LNq32: - FirstOpnd = 2; - NumRegs = 3; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST3LNq16odd: - case ARM::VST3LNq32odd: - FirstOpnd = 2; - NumRegs = 3; - Offset = 1; - Stride = 2; - return true; - - case ARM::VST4LNd8: - case ARM::VST4LNd16: - case ARM::VST4LNd32: - FirstOpnd = 2; - NumRegs = 4; - return true; - - case ARM::VST4LNq16: - case ARM::VST4LNq32: - FirstOpnd = 2; - NumRegs = 4; - Offset = 0; - Stride = 2; - return true; - - case ARM::VST4LNq16odd: - case ARM::VST4LNq32odd: - FirstOpnd = 2; - NumRegs = 4; - Offset = 1; - Stride = 2; - return true; - - case ARM::VTBL2: - FirstOpnd = 1; - NumRegs = 2; - return true; - - case ARM::VTBL3: - FirstOpnd = 1; - NumRegs = 3; - return true; - - case ARM::VTBL4: - FirstOpnd = 1; - NumRegs = 4; - return true; - - case ARM::VTBX2: - FirstOpnd = 2; - NumRegs = 2; - return true; - - case ARM::VTBX3: - FirstOpnd = 2; - NumRegs = 3; - return true; - - case ARM::VTBX4: - FirstOpnd = 2; - NumRegs = 4; - return true; - } - - return false; -} - -bool -NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs, - unsigned Offset, unsigned Stride) const { - MachineOperand &FMO = MI->getOperand(FirstOpnd); - assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = FMO.getReg(); - (void)VirtReg; - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - - unsigned LastSubIdx = 0; - if (FMO.isDef()) { - MachineInstr *RegSeq = 0; - for (unsigned R = 0; R < NumRegs; ++R) { - const MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - // Feeding into a REG_SEQUENCE. - if (!MRI->hasOneNonDBGUse(VirtReg)) - return false; - MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg); - if (!UseMI->isRegSequence()) - return false; - if (RegSeq && RegSeq != UseMI) - return false; - unsigned OpIdx = 1 + (Offset + R * Stride) * 2; - if (UseMI->getOperand(OpIdx).getReg() != VirtReg) - llvm_unreachable("Malformed REG_SEQUENCE instruction!"); - unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm(); - if (LastSubIdx) { - if (LastSubIdx != SubIdx-Stride) - return false; - } else { - // Must start from dsub_0 or qsub_0. - if (SubIdx != (ARM::dsub_0+Offset) && - SubIdx != (ARM::qsub_0+Offset)) - return false; - } - RegSeq = UseMI; - LastSubIdx = SubIdx; - } - - // In the case of vld3, etc., make sure the trailing operand of - // REG_SEQUENCE is an undef. - if (NumRegs == 3) { - unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2; - const MachineOperand &MO = RegSeq->getOperand(OpIdx); - unsigned VirtReg = MO.getReg(); - MachineInstr *DefMI = MRI->getVRegDef(VirtReg); - if (!DefMI || !DefMI->isImplicitDef()) - return false; - } - return true; - } - - unsigned LastSrcReg = 0; - SmallVector<unsigned, 4> SubIds; - for (unsigned R = 0; R < NumRegs; ++R) { - const MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - // Extracting from a Q or QQ register. - MachineInstr *DefMI = MRI->getVRegDef(VirtReg); - if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg()) - return false; - VirtReg = DefMI->getOperand(1).getReg(); - if (LastSrcReg && LastSrcReg != VirtReg) - return false; - LastSrcReg = VirtReg; - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - if (RC != ARM::QPRRegisterClass && - RC != ARM::QQPRRegisterClass && - RC != ARM::QQQQPRRegisterClass) - return false; - unsigned SubIdx = DefMI->getOperand(1).getSubReg(); - if (LastSubIdx) { - if (LastSubIdx != SubIdx-Stride) - return false; - } else { - // Must start from dsub_0 or qsub_0. - if (SubIdx != (ARM::dsub_0+Offset) && - SubIdx != (ARM::qsub_0+Offset)) - return false; - } - SubIds.push_back(SubIdx); - LastSubIdx = SubIdx; - } - - // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is - // currently required for correctness. e.g. - // %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6 - // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6 - // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5 - // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>, - // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5 - // respectively. - // We need to change how we model uses of REG_SEQUENCE. - for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); - unsigned OldReg = MO.getReg(); - MachineInstr *DefMI = MRI->getVRegDef(OldReg); - assert(DefMI->isCopy()); - MO.setReg(LastSrcReg); - MO.setSubReg(SubIds[R]); - MO.setIsKill(false); - // Delete the EXTRACT_SUBREG if its result is now dead. - if (MRI->use_empty(OldReg)) - DefMI->eraseFromParent(); - } - - return true; -} - -bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { - bool Modified = false; - - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - for (; MBBI != E; ++MBBI) { - MachineInstr *MI = &*MBBI; - unsigned FirstOpnd, NumRegs, Offset, Stride; - if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) - continue; - if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) - continue; - - MachineBasicBlock::iterator NextI = llvm::next(MBBI); - for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - - // For now, just assign a fixed set of adjacent registers. - // This leaves plenty of room for future improvements. - static const unsigned NEONDRegs[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 - }; - MO.setReg(NEONDRegs[Offset + R * Stride]); - - if (MO.isUse()) { - // Insert a copy from VirtReg. - BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg()) - .addReg(VirtReg, getKillRegState(MO.isKill())); - MO.setIsKill(); - } else if (MO.isDef() && !MO.isDead()) { - // Add a copy to VirtReg. - BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg) - .addReg(MO.getReg()); - } - } - } - - return Modified; -} - -bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - MRI = &MF.getRegInfo(); - - bool Modified = false; - for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; - ++MFI) { - MachineBasicBlock &MBB = *MFI; - Modified |= PreAllocNEONRegisters(MBB); - } - - return Modified; -} - -/// createNEONPreAllocPass - returns an instance of the NEON register -/// pre-allocation pass. -FunctionPass *llvm::createNEONPreAllocPass() { - return new NEONPreAllocPass(); -} diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index 6b605bb..463c440 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -68,7 +68,7 @@ LPCRELL0: //===---------------------------------------------------------------------===// -We compiles the following: +We compile the following: define i16 @func_entry_2E_ce(i32 %i) { switch i32 %i, label %bb12.exitStub [ @@ -246,3 +246,22 @@ Thumb2. Rather than having tBR_JTr print a ".align 2" and constant island pass pad it, add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes won't have to over-estimate. It can also be used for loop alignment pass. + +//===---------------------------------------------------------------------===// + +We generate conditional code for icmp when we don't need to. This code: + + int foo(int s) { + return s == 1; + } + +produces: + +foo: + cmp r0, #1 + mov.w r0, #0 + it eq + moveq r0, #1 + bx lr + +when it could use subs + adcs. This is GCC PR46975. diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp new file mode 100644 index 0000000..233e165 --- /dev/null +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -0,0 +1,352 @@ +//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Thumb1 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "Thumb1FrameLowering.h" +#include "ARMBaseInstrInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + unsigned CFSize = FFI->getMaxCallFrameSize(); + // It's not always a good idea to include the call frame as part of the + // stack frame. ARM (especially Thumb) has small immediate offset to + // address the stack frame. So a large call frame can cause poor codegen + // and may even makes it impossible to scavenge a register. + if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 + return false; + + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +static void emitSPUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo &TII, DebugLoc dl, + const Thumb1RegisterInfo &MRI, + int NumBytes) { + emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, + MRI, dl); +} + +void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const Thumb1RegisterInfo *RegInfo = + static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + const Thumb1InstrInfo &TII = + *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); + + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned NumBytes = MFI->getStackSize(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned BasePtr = RegInfo->getBaseRegister(); + + // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. + NumBytes = (NumBytes + 3) & ~3; + MFI->setStackSize(NumBytes); + + // Determine the sizes of each callee-save spill areas and record which frame + // belongs to which callee-save spill areas. + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + int FramePtrSpillFI = 0; + + if (VARegSaveSize) + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); + return; + } + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + int FI = CSI[i].getFrameIdx(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + if (STI.isTargetDarwin()) { + AFI->addGPRCalleeSavedArea2Frame(FI); + GPRCS2Size += 4; + } else { + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + } + break; + default: + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } + } + + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + ++MBBI; + if (MBBI != MBB.end()) + dl = MBBI->getDebugLoc(); + } + + // Determine starting offsets of spill areas. + unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; + unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); + AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); + AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + NumBytes = DPRCSOffset; + + // Adjust FP so it point to the stack slot that contains the previous FP. + if (hasFP(MF)) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + if (NumBytes > 7) + // If offset is > 7 then sp cannot be adjusted in a single instruction, + // try restoring from fp instead. + AFI->setShouldRestoreSPFromFP(true); + } + + if (NumBytes) + // Insert it after all the callee-save spills. + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); + + if (STI.isTargetELF() && hasFP(MF)) + MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); + + AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); + AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (RegInfo->hasBasePointer(MF)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); + + // If the frame has variable sized objects then the epilogue must restore + // the sp from fp. We can assume there's an FP here since hasFP already + // checks for hasVarSizedObjects. + if (MFI->hasVarSizedObjects()) + AFI->setShouldRestoreSPFromFP(true); +} + +static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { + if (MI->getOpcode() == ARM::tRestore && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) + return true; + else if (MI->getOpcode() == ARM::tPOP) { + // The first two operands are predicates. The last two are + // imp-def and imp-use of SP. Check everything in between. + for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i) + if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + return false; + return true; + } + return false; +} + +void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert((MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::tPOP_RET) && + "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const Thumb1RegisterInfo *RegInfo = + static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + const Thumb1InstrInfo &TII = + *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); + + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + int NumBytes = (int)MFI->getStackSize(); + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); + } else { + // Unwind MBBI to point to first LDR / VLDRD. + if (MBBI != MBB.begin()) { + do + --MBBI; + while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); + if (!isCSRestore(MBBI, CSRegs)) + ++MBBI; + } + + // Move SP to start of FP callee save spill area. + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedAreaSize()); + + if (AFI->shouldRestoreSPFromFP()) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot, the target is ELF and the function has FP, or + // the target uses var sized objects. + if (NumBytes) { + assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && + "No scratch register to restore SP from FP!"); + emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes, + TII, *RegInfo, dl); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) + .addReg(ARM::R4); + } else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) + .addReg(FramePtr); + } else { + if (MBBI->getOpcode() == ARM::tBX_RET && + &MBB.front() != MBBI && + prior(MBBI)->getOpcode() == ARM::tPOP) { + MachineBasicBlock::iterator PMBBI = prior(MBBI); + emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes); + } else + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes); + } + } + + if (VARegSaveSize) { + // Unlike T2 and ARM mode, the T1 pop instruction cannot restore + // to LR, and we can't pop the value directly to the PC since + // we need to update the SP after popping the value. Therefore, we + // pop the old LR into R3 as a temporary. + + // Move back past the callee-saved register restoration + while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) + ++MBBI; + // Epilogue for vararg functions: pop LR to R3 and branch off it. + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(ARM::R3, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) + .addReg(ARM::R3, RegState::Kill); + // erase the old tBX_RET instruction + MBB.erase(MBBI); + } +} + +bool Thumb1FrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)); + AddDefaultPred(MIB); + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + bool isKill = true; + + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress + // then it's already added to the function and entry block live-in sets. + if (Reg == ARM::LR) { + MachineFunction &MF = *MBB.getParent(); + if (MF.getFrameInfo()->isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; + } + + if (isKill) + MBB.addLiveIn(Reg); + + MIB.addReg(Reg, getKillRegState(isKill)); + } + return true; +} + +bool Thumb1FrameLowering:: +restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + DebugLoc DL = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)); + AddDefaultPred(MIB); + + bool NumRegs = false; + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (Reg == ARM::LR) { + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) + continue; + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + MI = MBB.erase(MI); + } + MIB.addReg(Reg, getDefRegState(true)); + NumRegs = true; + } + + // It's illegal to emit pop instruction without operands. + if (NumRegs) + MBB.insert(MI, &*MIB); + else + MF.DeleteMachineInstr(MIB); + + return true; +} diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h new file mode 100644 index 0000000..c592e12 --- /dev/null +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -0,0 +1,52 @@ +//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef __THUMB_FRAMEINFO_H_ +#define __THUMM_FRAMEINFO_H_ + +#include "ARM.h" +#include "ARMFrameLowering.h" +#include "ARMSubtarget.h" +#include "Thumb1InstrInfo.h" +#include "Thumb1RegisterInfo.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class ARMSubtarget; + +class Thumb1FrameLowering : public ARMFrameLowering { +public: + explicit Thumb1FrameLowering(const ARMSubtarget &sti) + : ARMFrameLowering(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasReservedCallFrame(const MachineFunction &MF) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index af630ac..3fbb433 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -71,8 +71,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - MachineMemOperand::MOStore, 0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill)) @@ -99,85 +100,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - MachineMemOperand::MOLoad, 0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } - -bool Thumb1InstrInfo:: -spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH)); - AddDefaultPred(MIB); - for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - bool isKill = true; - - // Add the callee-saved register as live-in unless it's LR and - // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress - // then it's already added to the function and entry block live-in sets. - if (Reg == ARM::LR) { - MachineFunction &MF = *MBB.getParent(); - if (MF.getFrameInfo()->isReturnAddressTaken() && - MF.getRegInfo().isLiveIn(Reg)) - isKill = false; - } - - if (isKill) - MBB.addLiveIn(Reg); - - MIB.addReg(Reg, getKillRegState(isKill)); - } - return true; -} - -bool Thumb1InstrInfo:: -restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (CSI.empty()) - return false; - - bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; - DebugLoc DL = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::tPOP)); - AddDefaultPred(MIB); - - bool NumRegs = false; - for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - if (Reg == ARM::LR) { - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - Reg = ARM::PC; - (*MIB).setDesc(get(ARM::tPOP_RET)); - MI = MBB.erase(MI); - } - MIB.addReg(Reg, getDefRegState(true)); - NumRegs = true; - } - - // It's illegal to emit pop instruction without operands. - if (NumRegs) - MBB.insert(MI, &*MIB); - else - MF.DeleteMachineInstr(MIB); - - return true; -} diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 555135a..17ef2f7 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -37,28 +37,19 @@ public: /// const Thumb1RegisterInfo &getRegisterInfo() const { return RI; } - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index a21a3da..f62a13e 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -29,7 +29,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" @@ -63,24 +63,11 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRcp)) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci)) .addReg(DestReg, getDefRegState(true), SubIdx) .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); } -bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - unsigned CFSize = FFI->getMaxCallFrameSize(); - // It's not always a good idea to include the call frame as part of the - // stack frame. ARM (especially Thumb) has small immediate offset to - // address the stack frame. So a large call frame can cause poor codegen - // and may even makes it impossible to scavenge a register. - if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 - return false; - - return !MF.getFrameInfo()->hasVarSizedObjects(); -} - /// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. Materialize the immediate @@ -92,7 +79,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned BaseReg, int NumBytes, bool CanChangeCC, const TargetInstrInfo &TII, - const Thumb1RegisterInfo& MRI, + const ARMBaseRegisterInfo& MRI, DebugLoc dl) { MachineFunction &MF = *MBB.getParent(); bool isHigh = !isARMLowRegister(DestReg) || @@ -162,13 +149,12 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. -static -void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, unsigned BaseReg, - int NumBytes, const TargetInstrInfo &TII, - const Thumb1RegisterInfo& MRI, - DebugLoc dl) { +void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned DestReg, unsigned BaseReg, + int NumBytes, const TargetInstrInfo &TII, + const ARMBaseRegisterInfo& MRI, + DebugLoc dl) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; @@ -304,7 +290,9 @@ static void emitSPUpdate(MachineBasicBlock &MBB, void Thumb1RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount // ADJCALLSTACKUP -> add, sp, sp, amount @@ -315,7 +303,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = TFI->getStackAlignment(); Amount = (Amount+Align-1)/Align*Align; // Replace the pseudo instruction with a new instruction... @@ -363,6 +351,22 @@ static void removeOperands(MachineInstr &MI, unsigned i) { MI.RemoveOperand(Op); } +/// convertToNonSPOpcode - Change the opcode to the non-SP version, because +/// we're replacing the frame index with a non-SP register. +static unsigned convertToNonSPOpcode(unsigned Opcode) { + switch (Opcode) { + case ARM::tLDRspi: + case ARM::tRestore: // FIXME: Should this opcode be here? + return ARM::tLDRi; + + case ARM::tSTRspi: + case ARM::tSpill: // FIXME: Should this opcode be here? + return ARM::tSTRi; + } + + return Opcode; +} + bool Thumb1RegisterInfo:: rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, @@ -464,55 +468,51 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, } return true; } else { - unsigned ImmIdx = 0; - int InstrOffs = 0; - unsigned NumBits = 0; - unsigned Scale = 1; - switch (AddrMode) { - case ARMII::AddrModeT1_s: { - ImmIdx = FrameRegIdx+1; - InstrOffs = MI.getOperand(ImmIdx).getImm(); - NumBits = (FrameReg == ARM::SP) ? 8 : 5; - Scale = 4; - break; - } - default: + if (AddrMode != ARMII::AddrModeT1_s) llvm_unreachable("Unsupported addressing mode!"); - break; - } + + unsigned ImmIdx = FrameRegIdx + 1; + int InstrOffs = MI.getOperand(ImmIdx).getImm(); + unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5; + unsigned Scale = 4; Offset += InstrOffs * Scale; - assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!"); // Common case: small offset, fits into instruction. MachineOperand &ImmOp = MI.getOperand(ImmIdx); int ImmedOffset = Offset / Scale; unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { - // Replace the FrameIndex with sp + // Replace the FrameIndex with the frame register (e.g., sp). MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); ImmOp.ChangeToImmediate(ImmedOffset); + + // If we're using a register where sp was stored, convert the instruction + // to the non-SP version. + unsigned NewOpc = convertToNonSPOpcode(Opcode); + if (NewOpc != Opcode && FrameReg != ARM::SP) + MI.setDesc(TII.get(NewOpc)); + return true; } - bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; - if (AddrMode == ARMII::AddrModeT1_s) { - // Thumb tLDRspi, tSTRspi. These will change to instructions that use - // a different base register. - NumBits = 5; - Mask = (1 << NumBits) - 1; - } + NumBits = 5; + Mask = (1 << NumBits) - 1; + // If this is a thumb spill / restore, we will be using a constpool load to // materialize the offset. - if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore) + if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) { ImmOp.ChangeToImmediate(0); - else { + } else { // Otherwise, it didn't fit. Pull in what we can to simplify the immed. ImmedOffset = ImmedOffset & Mask; ImmOp.ChangeToImmediate(ImmedOffset); - Offset &= ~(Mask*Scale); + Offset &= ~(Mask * Scale); } } + return Offset == 0; } @@ -602,7 +602,8 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) Offset -= AFI->getGPRCalleeSavedArea2Offset(); else if (MF.getFrameInfo()->hasVarSizedObjects()) { - assert(SPAdj == 0 && hasFP(MF) && "Unexpected"); + assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) && + "Unexpected"); // There are alloca()'s in this function, must reference off the frame // pointer or base pointer instead. if (!hasBasePointer(MF)) { @@ -655,13 +656,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, *this, dl); } - MI.setDesc(TII.get(ARM::tLDR)); + MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); if (UseRR) - // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tLDR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); + // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame + // register. The offset is already handled in the vreg value. + MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); } else if (Desc.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); bool UseRR = false; @@ -677,14 +677,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, *this, dl); - MI.setDesc(TII.get(ARM::tSTR)); + MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(i).ChangeToRegister(VReg, false, false, true); - if (UseRR) // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tSTR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); - } else + if (UseRR) + // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame + // register. The offset is already handled in the vreg value. + MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); + } else { assert(false && "Unexpected opcode!"); + } // Add predicate back if it's needed. if (MI.getDesc().isPredicable()) { @@ -692,206 +693,3 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, AddDefaultPred(MIB); } } - -void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); - unsigned NumBytes = MFI->getStackSize(); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. - NumBytes = (NumBytes + 3) & ~3; - MFI->setStackSize(NumBytes); - - // Determine the sizes of each callee-save spill areas and record which frame - // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; - int FramePtrSpillFI = 0; - - if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize); - - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); - return; - } - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - int FI = CSI[i].getFrameIdx(); - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - break; - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { - AFI->addGPRCalleeSavedArea2Frame(FI); - GPRCS2Size += 4; - } else { - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - } - break; - default: - AFI->addDPRCalleeSavedAreaFrame(FI); - DPRCSSize += 8; - } - } - - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - ++MBBI; - if (MBBI != MBB.end()) - dl = MBBI->getDebugLoc(); - } - - // Adjust FP so it point to the stack slot that contains the previous FP. - if (hasFP(MF)) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); - AFI->setShouldRestoreSPFromFP(true); - } - - // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); - unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; - unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); - AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); - AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); - AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - - NumBytes = DPRCSOffset; - if (NumBytes) { - // Insert it after all the callee-save spills. - emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); - } - - if (STI.isTargetELF() && hasFP(MF)) - MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - - AFI->getFramePtrSpillOffset()); - - AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); - AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); - AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - - // If we need a base pointer, set it up here. It's whatever the value - // of the stack pointer is at this point. Any variable size objects - // will be allocated after this, so we can still use the base pointer - // to reference locals. - if (hasBasePointer(MF)) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); -} - -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - -static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - if (MI->getOpcode() == ARM::tRestore && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) - return true; - else if (MI->getOpcode() == ARM::tPOP) { - // The first two operands are predicates. The last two are - // imp-def and imp-use of SP. Check everything in between. - for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i) - if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) - return false; - return true; - } - return false; -} - -void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - assert((MBBI->getOpcode() == ARM::tBX_RET || - MBBI->getOpcode() == ARM::tPOP_RET) && - "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); - int NumBytes = (int)MFI->getStackSize(); - const unsigned *CSRegs = getCalleeSavedRegs(); - - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); - } else { - // Unwind MBBI to point to first LDR / VLDRD. - if (MBBI != MBB.begin()) { - do - --MBBI; - while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); - if (!isCSRestore(MBBI, CSRegs)) - ++MBBI; - } - - // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + - AFI->getGPRCalleeSavedArea2Size() + - AFI->getDPRCalleeSavedAreaSize()); - - if (AFI->shouldRestoreSPFromFP()) { - NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. - if (NumBytes) - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, - TII, *this, dl); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) - .addReg(FramePtr); - } else { - if (MBBI->getOpcode() == ARM::tBX_RET && - &MBB.front() != MBBI && - prior(MBBI)->getOpcode() == ARM::tPOP) { - MachineBasicBlock::iterator PMBBI = prior(MBBI); - emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes); - } else - emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); - } - } - - if (VARegSaveSize) { - // Unlike T2 and ARM mode, the T1 pop instruction cannot restore - // to LR, and we can't pop the value directly to the PC since - // we need to update the SP after popping the value. Therefore, we - // pop the old LR into R3 as a temporary. - - // Move back past the callee-saved register restoration - while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) - ++MBBI; - // Epilogue for vararg functions: pop LR to R3 and branch off it. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(ARM::R3, RegState::Define); - - emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); - - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) - .addReg(ARM::R3, RegState::Kill); - // erase the old tBX_RET instruction - MBB.erase(MBBI); - } -} diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index c578054..8a87cc5 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -38,8 +38,6 @@ public: unsigned PredReg = 0) const; /// Code Generation virtual methods... - bool hasReservedCallFrame(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -59,9 +57,6 @@ public: unsigned Reg) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; }; } diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/lib/Target/ARM/Thumb2HazardRecognizer.cpp deleted file mode 100644 index 172908d..0000000 --- a/lib/Target/ARM/Thumb2HazardRecognizer.cpp +++ /dev/null @@ -1,53 +0,0 @@ -//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "ARM.h" -#include "Thumb2HazardRecognizer.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/ScheduleDAG.h" -using namespace llvm; - -ScheduleHazardRecognizer::HazardType -Thumb2HazardRecognizer::getHazardType(SUnit *SU) { - if (ITBlockSize) { - MachineInstr *MI = SU->getInstr(); - if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1]) - return Hazard; - } - - return PostRAHazardRecognizer::getHazardType(SU); -} - -void Thumb2HazardRecognizer::Reset() { - ITBlockSize = 0; - PostRAHazardRecognizer::Reset(); -} - -void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) { - MachineInstr *MI = SU->getInstr(); - unsigned Opcode = MI->getOpcode(); - if (ITBlockSize) { - --ITBlockSize; - } else if (Opcode == ARM::t2IT) { - unsigned Mask = MI->getOperand(1).getImm(); - unsigned NumTZ = CountTrailingZeros_32(Mask); - assert(NumTZ <= 3 && "Invalid IT mask!"); - ITBlockSize = 4 - NumTZ; - MachineBasicBlock::iterator I = MI; - for (unsigned i = 0; i < ITBlockSize; ++i) { - // Advance to the next instruction, skipping any dbg_value instructions. - do { - ++I; - } while (I->isDebugValue()); - ITBlockMIs[ITBlockSize-1-i] = &*I; - } - } - - PostRAHazardRecognizer::EmitInstruction(SU); -} diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h deleted file mode 100644 index 4726658..0000000 --- a/lib/Target/ARM/Thumb2HazardRecognizer.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines hazard recognizers for scheduling Thumb2 functions on -// ARM processors. -// -//===----------------------------------------------------------------------===// - -#ifndef THUMB2HAZARDRECOGNIZER_H -#define THUMB2HAZARDRECOGNIZER_H - -#include "llvm/CodeGen/PostRAHazardRecognizer.h" - -namespace llvm { - -class MachineInstr; - -class Thumb2HazardRecognizer : public PostRAHazardRecognizer { - unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. - MachineInstr *ITBlockMIs[4]; - -public: - Thumb2HazardRecognizer(const InstrItineraryData &ItinData) : - PostRAHazardRecognizer(ItinData) {} - - virtual HazardType getHazardType(SUnit *SU); - virtual void Reset(); - virtual void EmitInstruction(SUnit *SU); -}; - - -} // end namespace llvm - -#endif // THUMB2HAZARDRECOGNIZER_H diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 442f41d..2f67257 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -17,7 +17,6 @@ #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" -#include "Thumb2HazardRecognizer.h" #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -28,15 +27,10 @@ using namespace llvm; -static cl::opt<unsigned> -IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden, - cl::desc("Thumb2 if-conversion limit (default 3)"), - cl::init(3)); - -static cl::opt<unsigned> -IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden, - cl::desc("Thumb2 diamond if-conversion limit (default 3)"), - cl::init(3)); +static cl::opt<bool> +OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, + cl::desc("Use old-style Thumb2 if-conversion heuristics"), + cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { @@ -105,21 +99,6 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } -bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumInstrs) const { - return NumInstrs && NumInstrs <= IfCvtLimit; -} - -bool Thumb2InstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, - MachineBasicBlock &FMBB, unsigned NumF) const { - // FIXME: Catch optimization such as: - // r0 = movne - // r0 = moveq - return NumT && NumF && - NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit); -} - void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -155,8 +134,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - MachineMemOperand::MOStore, 0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12)) @@ -181,8 +161,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - MachineMemOperand::MOLoad, 0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) @@ -193,11 +174,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } -ScheduleHazardRecognizer *Thumb2InstrInfo:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { - return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II); -} - void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 3a9f8b1..f2637d7 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -38,11 +38,6 @@ public: bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const; - bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const; - - bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs, - MachineBasicBlock &FMBB, unsigned NumFInstrs) const; - void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -70,9 +65,6 @@ public: /// always be able to get register info as well (through this method). /// const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } - - ScheduleHazardRecognizer * - CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const; }; /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 07dd0be..099b8f7 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -29,7 +29,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 0c3962d..cc8f61c 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -58,7 +58,7 @@ namespace { { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, // Note: immediate scale is 4. - { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0 }, + { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 1 }, { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 }, { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 }, { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, @@ -68,9 +68,7 @@ namespace { //FIXME: Disable CMN, as CCodes are backwards from compare expectations //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 }, { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 }, - { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0 }, - { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 2,0, 0 }, - { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 2,0, 0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 1 }, { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, // FIXME: adr.n immediate offset must be multiple of 4. //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, @@ -106,26 +104,27 @@ namespace { // FIXME: Clean this up after splitting each Thumb load / store opcode // into multiple ones. - { ARM::t2LDRi12,ARM::tLDR, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, - { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRHi12,ARM::tLDRH, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, + { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRi12,ARM::tSTR, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, - { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 }, - - { ARM::t2LDM, ARM::tLDM, 0, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDM_UPD,ARM::tLDM_UPD,ARM::tPOP, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, + { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 1 }, + + { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 1 }, // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent - { ARM::t2STM_UPD,ARM::tSTM_UPD,ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, }; class Thumb2SizeReduce : public MachineFunctionPass { @@ -217,8 +216,8 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, /// Old opcode has an optional def of CPSR. if (HasCC) return true; - // If both old opcode does not implicit CPSR def, then it's not ok since - // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP. + // If old opcode does not implicitly define CPSR, then it's not ok since + // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. if (!HasImplicitCPSRDef(MI->getDesc())) return false; HasCC = true; @@ -233,9 +232,10 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, static bool VerifyLowRegs(MachineInstr *MI) { unsigned Opc = MI->getOpcode(); - bool isPCOk = (Opc == ARM::t2LDM_RET || Opc == ARM::t2LDM || - Opc == ARM::t2LDM_UPD); - bool isLROk = (Opc == ARM::t2STM_UPD); + bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA || + Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || + Opc == ARM::t2LDMDB_UPD); + bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -275,29 +275,32 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. uint8_t ImmLimit = Entry.Imm1Limit; + switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: - case ARM::t2STRi12: { - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == ARM::SP) { + case ARM::t2STRi12: + if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; HasOffReg = false; } + Scale = 4; HasImmOffset = true; + HasOffReg = false; break; - } case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; + HasOffReg = false; break; case ARM::t2LDRHi12: case ARM::t2STRHi12: Scale = 2; HasImmOffset = true; + HasOffReg = false; break; case ARM::t2LDRs: case ARM::t2LDRBs: @@ -310,11 +313,12 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, HasShift = true; OpNum = 4; break; - case ARM::t2LDM: { + case ARM::t2LDMIA: + case ARM::t2LDMDB: { unsigned BaseReg = MI->getOperand(0).getReg(); - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); - if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) + if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA) return false; + // For the non-writeback version (this one), the base register must be // one of the registers being loaded. bool isOK = false; @@ -324,6 +328,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, break; } } + if (!isOK) return false; @@ -331,28 +336,33 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, isLdStMul = true; break; } - case ARM::t2LDM_RET: { + case ARM::t2LDMIA_RET: { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg != ARM::SP) return false; Opc = Entry.NarrowOpc2; // tPOP_RET - OpNum = 3; + OpNum = 2; isLdStMul = true; break; } - case ARM::t2LDM_UPD: - case ARM::t2STM_UPD: { + case ARM::t2LDMIA_UPD: + case ARM::t2LDMDB_UPD: + case ARM::t2STMIA_UPD: + case ARM::t2STMDB_UPD: { OpNum = 0; + unsigned BaseReg = MI->getOperand(1).getReg(); - ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(2).getImm()); if (BaseReg == ARM::SP && - ((Entry.WideOpc == ARM::t2LDM_UPD && Mode == ARM_AM::ia) || - (Entry.WideOpc == ARM::t2STM_UPD && Mode == ARM_AM::db))) { + (Entry.WideOpc == ARM::t2LDMIA_UPD || + Entry.WideOpc == ARM::t2STMDB_UPD)) { Opc = Entry.NarrowOpc2; // tPOP or tPUSH - OpNum = 3; - } else if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) { + OpNum = 2; + } else if (!isARMLowRegister(BaseReg) || + (Entry.WideOpc != ARM::t2LDMIA_UPD && + Entry.WideOpc != ARM::t2STMIA_UPD)) { return false; } + isLdStMul = true; break; } @@ -363,6 +373,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, if (HasShift) { OffsetReg = MI->getOperand(2).getReg(); OffsetKill = MI->getOperand(2).isKill(); + if (MI->getOperand(3).getImm()) // Thumb1 addressing mode doesn't support shift. return false; @@ -372,23 +383,22 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; - if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset) + + if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; } // Add the 16-bit load / store instruction. - // FIXME: Thumb1 addressing mode encode both immediate and register offset. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); if (!isLdStMul) { - MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1)); - if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) { - // tLDRSB and tLDRSH do not have an immediate offset field. On the other - // hand, it must have an offset register. - // FIXME: Remove this special case. - MIB.addImm(OffsetImm/Scale); - } + MIB.addOperand(MI->getOperand(0)); + MIB.addOperand(MI->getOperand(1)); + + if (HasImmOffset) + MIB.addImm(OffsetImm / Scale); + assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); if (HasOffReg) @@ -423,7 +433,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Opc = MI->getOpcode(); switch (Opc) { default: break; - case ARM::t2ADDSri: + case ARM::t2ADDSri: case ARM::t2ADDSrr: { unsigned PredReg = 0; if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { @@ -451,6 +461,25 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (MI->getOperand(1).isImm()) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); break; + case ARM::t2CMPrr: { + // Try to reduce to the lo-reg only version first. Why there are two + // versions of the instruction is a mystery. + // It would be nice to just have two entries in the master table that + // are prioritized, but the table assumes a unique entry for each + // source insn opcode. So for now, we hack a local entry record to use. + static const ReduceEntry NarrowEntry = + { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 }; + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR)) + return true; + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + } + case ARM::t2ADDrSPi: { + static const ReduceEntry NarrowEntry = + { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 }; + if (MI->getOperand(0).getReg() == ARM::SP) + return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + } } return false; } diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h index 5cf4866..2c359da 100644 --- a/lib/Target/Alpha/Alpha.h +++ b/lib/Target/Alpha/Alpha.h @@ -18,6 +18,13 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { + namespace Alpha { + // These describe LDAx + + static const int IMM_LOW = -32768; + static const int IMM_HIGH = 32767; + static const int IMM_MULT = 65536; + } class AlphaTargetMachine; class FunctionPass; diff --git a/lib/Target/Alpha/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AlphaAsmPrinter.cpp new file mode 100644 index 0000000..46ae286 --- /dev/null +++ b/lib/Target/Alpha/AlphaAsmPrinter.cpp @@ -0,0 +1,166 @@ +//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format Alpha assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Alpha.h" +#include "AlphaInstrInfo.h" +#include "AlphaTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + struct AlphaAsmPrinter : public AsmPrinter { + /// Unique incrementer for label values for referencing Global values. + /// + + explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer) + : AsmPrinter(tm, Streamer) {} + + virtual const char *getPassName() const { + return "Alpha Assembly Printer"; + } + void printInstruction(const MachineInstr *MI, raw_ostream &O); + void EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); + } + static const char *getRegisterName(unsigned RegNo); + + void printOp(const MachineOperand &MO, raw_ostream &O); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + void EmitStartOfAsmFile(Module &M); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O); + }; +} // end of anonymous namespace + +#include "AlphaGenAsmWriter.inc" + +void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isReg()) { + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Not physreg??"); + O << getRegisterName(MO.getReg()); + } else if (MO.isImm()) { + O << MO.getImm(); + assert(MO.getImm() < (1 << 30)); + } else { + printOp(MO, O); + } +} + + +void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << getRegisterName(MO.getReg()); + return; + + case MachineOperand::MO_Immediate: + assert(0 && "printOp() does not handle immediate values"); + return; + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + return; + + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + return; + + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + return; + + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + + default: + O << "<unknown operand type: " << MO.getType() << ">"; + return; + } +} + +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void AlphaAsmPrinter::EmitFunctionBodyStart() { + OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName())); +} + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void AlphaAsmPrinter::EmitFunctionBodyEnd() { + OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName())); +} + +void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) { + OutStreamer.EmitRawText(StringRef("\t.arch ev6")); + OutStreamer.EmitRawText(StringRef("\t.set noat")); +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + printOperand(MI, OpNo, O); + return false; +} + +bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + O << "0("; + printOperand(MI, OpNo, O); + O << ")"; + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeAlphaAsmPrinter() { + RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget); +} diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp deleted file mode 100644 index 3aec070..0000000 --- a/lib/Target/Alpha/AlphaCodeEmitter.cpp +++ /dev/null @@ -1,222 +0,0 @@ -//===-- Alpha/AlphaCodeEmitter.cpp - Convert Alpha code to machine code ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the pass that transforms the Alpha machine instructions -// into relocatable machine code. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "alpha-emitter" -#include "AlphaTargetMachine.h" -#include "AlphaRelocations.h" -#include "Alpha.h" -#include "llvm/PassManager.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class AlphaCodeEmitter : public MachineFunctionPass { - JITCodeEmitter &MCE; - const AlphaInstrInfo *II; - public: - static char ID; - - AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID), - MCE(mce) {} - - /// getBinaryCodeForInstr - This function, generated by the - /// CodeEmitterGenerator using TableGen, produces the binary encoding for - /// machine instructions. - - unsigned getBinaryCodeForInstr(const MachineInstr &MI); - - /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr - - unsigned getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO); - - bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "Alpha Machine Code Emitter"; - } - - private: - void emitBasicBlock(MachineBasicBlock &MBB); - }; -} - -char AlphaCodeEmitter::ID = 0; - - -/// createAlphaCodeEmitterPass - Return a pass that emits the collected Alpha -/// code to the specified MCE object. - -FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM, - JITCodeEmitter &JCE) { - return new AlphaCodeEmitter(JCE); -} - -bool AlphaCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - II = ((AlphaTargetMachine&)MF.getTarget()).getInstrInfo(); - - do { - MCE.startFunction(MF); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - emitBasicBlock(*I); - } while (MCE.finishFunction(MF)); - - return false; -} - -void AlphaCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { - MCE.StartMachineBasicBlock(&MBB); - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - const MachineInstr &MI = *I; - MCE.processDebugLoc(MI.getDebugLoc(), true); - switch(MI.getOpcode()) { - default: - MCE.emitWordLE(getBinaryCodeForInstr(*I)); - break; - case Alpha::ALTENT: - case Alpha::PCLABEL: - case Alpha::MEMLABEL: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - break; //skip these - } - MCE.processDebugLoc(MI.getDebugLoc(), false); - } -} - -static unsigned getAlphaRegNumber(unsigned Reg) { - switch (Reg) { - case Alpha::R0 : case Alpha::F0 : return 0; - case Alpha::R1 : case Alpha::F1 : return 1; - case Alpha::R2 : case Alpha::F2 : return 2; - case Alpha::R3 : case Alpha::F3 : return 3; - case Alpha::R4 : case Alpha::F4 : return 4; - case Alpha::R5 : case Alpha::F5 : return 5; - case Alpha::R6 : case Alpha::F6 : return 6; - case Alpha::R7 : case Alpha::F7 : return 7; - case Alpha::R8 : case Alpha::F8 : return 8; - case Alpha::R9 : case Alpha::F9 : return 9; - case Alpha::R10 : case Alpha::F10 : return 10; - case Alpha::R11 : case Alpha::F11 : return 11; - case Alpha::R12 : case Alpha::F12 : return 12; - case Alpha::R13 : case Alpha::F13 : return 13; - case Alpha::R14 : case Alpha::F14 : return 14; - case Alpha::R15 : case Alpha::F15 : return 15; - case Alpha::R16 : case Alpha::F16 : return 16; - case Alpha::R17 : case Alpha::F17 : return 17; - case Alpha::R18 : case Alpha::F18 : return 18; - case Alpha::R19 : case Alpha::F19 : return 19; - case Alpha::R20 : case Alpha::F20 : return 20; - case Alpha::R21 : case Alpha::F21 : return 21; - case Alpha::R22 : case Alpha::F22 : return 22; - case Alpha::R23 : case Alpha::F23 : return 23; - case Alpha::R24 : case Alpha::F24 : return 24; - case Alpha::R25 : case Alpha::F25 : return 25; - case Alpha::R26 : case Alpha::F26 : return 26; - case Alpha::R27 : case Alpha::F27 : return 27; - case Alpha::R28 : case Alpha::F28 : return 28; - case Alpha::R29 : case Alpha::F29 : return 29; - case Alpha::R30 : case Alpha::F30 : return 30; - case Alpha::R31 : case Alpha::F31 : return 31; - default: - llvm_unreachable("Unhandled reg"); - } -} - -unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) { - - unsigned rv = 0; // Return value; defaults to 0 for unhandled cases - // or things that get fixed up later by the JIT. - - if (MO.isReg()) { - rv = getAlphaRegNumber(MO.getReg()); - } else if (MO.isImm()) { - rv = MO.getImm(); - } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) { - DEBUG(errs() << MO << " is a relocated op for " << MI << "\n"); - unsigned Reloc = 0; - int Offset = 0; - bool useGOT = false; - switch (MI.getOpcode()) { - case Alpha::BSR: - Reloc = Alpha::reloc_bsr; - break; - case Alpha::LDLr: - case Alpha::LDQr: - case Alpha::LDBUr: - case Alpha::LDWUr: - case Alpha::LDSr: - case Alpha::LDTr: - case Alpha::LDAr: - case Alpha::STQr: - case Alpha::STLr: - case Alpha::STWr: - case Alpha::STBr: - case Alpha::STSr: - case Alpha::STTr: - Reloc = Alpha::reloc_gprellow; - break; - case Alpha::LDAHr: - Reloc = Alpha::reloc_gprelhigh; - break; - case Alpha::LDQl: - Reloc = Alpha::reloc_literal; - useGOT = true; - break; - case Alpha::LDAg: - case Alpha::LDAHg: - Reloc = Alpha::reloc_gpdist; - Offset = MI.getOperand(3).getImm(); - break; - default: - llvm_unreachable("unknown relocatable instruction"); - } - if (MO.isGlobal()) - MCE.addRelocation(MachineRelocation::getGV( - MCE.getCurrentPCOffset(), - Reloc, - const_cast<GlobalValue *>(MO.getGlobal()), - Offset, - isa<Function>(MO.getGlobal()), - useGOT)); - else if (MO.isSymbol()) - MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), - Reloc, MO.getSymbolName(), - Offset, true)); - else - MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), - Reloc, MO.getIndex(), Offset)); - } else if (MO.isMBB()) { - MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), - Alpha::reloc_bsr, MO.getMBB())); - } else { -#ifndef NDEBUG - errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; -#endif - llvm_unreachable(0); - } - - return rv; -} - -#include "AlphaGenCodeEmitter.inc" diff --git a/lib/Target/Alpha/AlphaFrameLowering.cpp b/lib/Target/Alpha/AlphaFrameLowering.cpp new file mode 100644 index 0000000..690cd1d --- /dev/null +++ b/lib/Target/Alpha/AlphaFrameLowering.cpp @@ -0,0 +1,143 @@ +//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Alpha implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "AlphaFrameLowering.h" +#include "AlphaInstrInfo.h" +#include "AlphaMachineFunctionInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/ADT/Twine.h" + +using namespace llvm; + +static long getUpper16(long l) { + long y = l / Alpha::IMM_MULT; + if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH) + ++y; + return y; +} + +static long getLower16(long l) { + long h = getUpper16(l); + return l - h * Alpha::IMM_MULT; +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +// +bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->hasVarSizedObjects(); +} + +void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc()); + bool FP = hasFP(MF); + + // Handle GOP offset + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29) + .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist); + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29) + .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist); + + BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT)) + .addGlobalAddress(MF.getFunction()); + + // Get the number of bytes to allocate from the FrameInfo + long NumBytes = MFI->getStackSize(); + + if (FP) + NumBytes += 8; //reserve space for the old FP + + // Do we need to allocate space on the stack? + if (NumBytes == 0) return; + + unsigned Align = getStackAlignment(); + NumBytes = (NumBytes+Align-1)/Align*Align; + + // Update frame info to pretend that this is part of the stack... + MFI->setStackSize(NumBytes); + + // adjust stack pointer: r30 -= numbytes + NumBytes = -NumBytes; + if (NumBytes >= Alpha::IMM_LOW) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) + .addReg(Alpha::R30); + } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) + .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) + .addImm(getLower16(NumBytes)).addReg(Alpha::R30); + } else { + report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); + } + + // Now if we need to, save the old FP and set the new + if (FP) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ)) + .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30); + // This must be the last instr in the prolog + BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15) + .addReg(Alpha::R30).addReg(Alpha::R30); + } + +} + +void AlphaFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + assert((MBBI->getOpcode() == Alpha::RETDAG || + MBBI->getOpcode() == Alpha::RETDAGp) + && "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + + bool FP = hasFP(MF); + + // Get the number of bytes allocated from the FrameInfo... + long NumBytes = MFI->getStackSize(); + + //now if we need to, restore the old FP + if (FP) { + //copy the FP into the SP (discards allocas) + BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15) + .addReg(Alpha::R15); + //restore the FP + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15) + .addImm(0).addReg(Alpha::R15); + } + + if (NumBytes != 0) { + if (NumBytes <= Alpha::IMM_HIGH) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) + .addReg(Alpha::R30); + } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) { + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) + .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); + BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) + .addImm(getLower16(NumBytes)).addReg(Alpha::R30); + } else { + report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); + } + } +} diff --git a/lib/Target/Alpha/AlphaFrameLowering.h b/lib/Target/Alpha/AlphaFrameLowering.h new file mode 100644 index 0000000..ebd9e1b --- /dev/null +++ b/lib/Target/Alpha/AlphaFrameLowering.h @@ -0,0 +1,43 @@ +//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHA_FRAMEINFO_H +#define ALPHA_FRAMEINFO_H + +#include "Alpha.h" +#include "AlphaSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class AlphaSubtarget; + +class AlphaFrameLowering : public TargetFrameLowering { + const AlphaSubtarget &STI; + // FIXME: This should end in MachineFunctionInfo, not here! + mutable int curgpdist; +public: + explicit AlphaFrameLowering(const AlphaSubtarget &sti) + : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index d197bd1..7b91fea 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -130,19 +130,6 @@ namespace { return (x - y) == r; } - static bool isFPZ(SDValue N) { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); - return (CN && (CN->getValueAPF().isZero())); - } - static bool isFPZn(SDValue N) { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); - return (CN && CN->getValueAPF().isNegZero()); - } - static bool isFPZp(SDValue N) { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); - return (CN && CN->getValueAPF().isPosZero()); - } - public: explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM) : SelectionDAGISel(TM) @@ -253,7 +240,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDNode *N) { Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, Chain.getValue(1)); SDNode *CNode = - CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag, + CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue, Chain, Chain.getValue(1)); Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, SDValue(CNode, 1)); @@ -416,13 +403,13 @@ void AlphaDAGToDAGISel::SelectCALL(SDNode *N) { Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag); InFlag = Chain.getValue(1); Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other, - MVT::Flag, Addr.getOperand(0), + MVT::Glue, Addr.getOperand(0), Chain, InFlag), 0); } else { Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag); InFlag = Chain.getValue(1); Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other, - MVT::Flag, Chain, InFlag), 0); + MVT::Glue, Chain, InFlag), 0); } InFlag = Chain.getValue(1); diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index ea78bf3..9137d65 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -27,6 +27,7 @@ #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/Intrinsics.h" +#include "llvm/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -124,7 +125,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::f32, Promote); - setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote); + setOperationAction(ISD::BITCAST, MVT::f32, Promote); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); @@ -284,8 +285,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), 0, - false, false, 0)); + MachinePointerInfo(),false, false, 0)); } } @@ -306,7 +306,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, } // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); @@ -431,7 +431,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), false, false, 0); } InVals.push_back(ArgVal); @@ -448,7 +448,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true); if (i == 0) FuncInfo->setVarArgsBase(FI); SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(), false, false, 0)); if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) @@ -456,7 +456,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true); SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(), false, false, 0)); } @@ -537,12 +537,14 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue(); DebugLoc dl = N->getDebugLoc(); - SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0, + SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, + MachinePointerInfo(VAListS), false, false, 0); SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); - SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1), - Tmp, NULL, 0, MVT::i32, false, false, 0); + SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), + Tmp, MachinePointerInfo(), + MVT::i32, false, false, 0); DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); if (N->getValueType(0).isFloatingPoint()) { @@ -556,7 +558,8 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset, DAG.getConstant(8, MVT::i64)); - Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0, + Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, + MachinePointerInfo(), MVT::i32, false, false, 0); } @@ -613,7 +616,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, "Unhandled SINT_TO_FP type in custom expander!"); SDValue LD; bool isDouble = Op.getValueType() == MVT::f64; - LD = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op.getOperand(0)); + LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl, isDouble?MVT::f64:MVT::f32, LD); return FP; @@ -627,7 +630,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, src); + return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src); } case ISD::ConstantPool: { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); @@ -645,11 +648,11 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, case ISD::GlobalAddress: { GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, GSDN->getOffset()); // FIXME there isn't really any debug info here - // if (!GV->hasWeakLinkage() && !GV->isDeclaration() + // if (!GV->hasWeakLinkage() && !GV->isDeclaration() // && !GV->hasLinkOnceLinkage()) { if (GV->hasLocalLinkage()) { SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA, @@ -706,10 +709,11 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SDValue Result; if (Op.getValueType() == MVT::i32) - Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr, - NULL, 0, MVT::i32, false, false, 0); + Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, + MachinePointerInfo(), MVT::i32, false, false, 0); else - Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0, + Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, + MachinePointerInfo(), false, false, 0); return Result; } @@ -720,17 +724,20 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0, + SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, + MachinePointerInfo(SrcS), false, false, 0); - SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0, + SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, + MachinePointerInfo(DestS), false, false, 0); SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, DAG.getConstant(8, MVT::i64)); - Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result, - NP, NULL,0, MVT::i32, false, false, 0); + Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, + NP, MachinePointerInfo(), MVT::i32, false, false, 0); SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, DAG.getConstant(8, MVT::i64)); - return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32, + return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, + MachinePointerInfo(), MVT::i32, false, false, 0); } case ISD::VASTART: { @@ -743,14 +750,15 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, // vastart stores the address of the VarArgsBase and VarArgsOffset SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64); - SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0, - false, false, 0); + SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, + MachinePointerInfo(VAListS), false, false, 0); SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); return DAG.getTruncStore(S1, dl, DAG.getConstant(FuncInfo->getVarArgsOffset(), MVT::i64), - SA2, NULL, 0, MVT::i32, false, false, 0); + SA2, MachinePointerInfo(), + MVT::i32, false, false, 0); } case ISD::RETURNADDR: return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64); @@ -771,7 +779,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, SDValue Chain, DataPtr; LowerVAARG(N, Chain, DataPtr, DAG); - SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0, + SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, + MachinePointerInfo(), false, false, 0); Results.push_back(Res); Results.push_back(SDValue(Res.getNode(), 1)); @@ -795,6 +804,30 @@ AlphaTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +AlphaTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'f': + weight = CW_Register; + break; + } + return weight; +} + std::vector<unsigned> AlphaTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index 46e0c7d..b429e9f 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -87,6 +87,11 @@ namespace llvm { ConstraintType getConstraintType(const std::string &Constraint) const; + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + std::vector<unsigned> getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 92de78a..099d715 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -27,7 +27,7 @@ def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>; def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>; def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; // These are target-independent nodes, but have target-specific formats. def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>; @@ -35,9 +35,9 @@ def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>, SDTCisVT<1, i64> ]>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; //******************** //Paterns for matching diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp deleted file mode 100644 index 12685ed..0000000 --- a/lib/Target/Alpha/AlphaJITInfo.cpp +++ /dev/null @@ -1,310 +0,0 @@ -//===-- AlphaJITInfo.cpp - Implement the JIT interfaces for the Alpha ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the JIT interfaces for the Alpha target. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "jit" -#include "AlphaJITInfo.h" -#include "AlphaRelocations.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <cstdlib> -using namespace llvm; - -#define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \ - ((Op << 26) | (RA << 21) | (LIT << 13) | (1 << 12) | (FUN << 5) | (RC)) -#define BUILD_OFormat(Op, RA, RB, FUN, RC) \ - ((Op << 26) | (RA << 21) | (RB << 16) | (FUN << 5) | (RC)) - -#define BUILD_LDA(RD, RS, IMM16) \ - ((0x08 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) -#define BUILD_LDAH(RD, RS, IMM16) \ - ((0x09 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) - -#define BUILD_LDQ(RD, RS, IMM16) \ - ((0x29 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 0xFFFF)) - -#define BUILD_JMP(RD, RS, IMM16) \ - ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x00 << 14) | ((IMM16) & 0x3FFF)) -#define BUILD_JSR(RD, RS, IMM16) \ - ((0x1A << 26) | ((RD) << 21) | ((RS) << 16) | (0x01 << 14) | ((IMM16) & 0x3FFF)) - -#define BUILD_SLLi(RD, RS, IMM8) \ - (BUILD_OFormatI(0x12, RS, IMM8, 0x39, RD)) - -#define BUILD_ORi(RD, RS, IMM8) \ - (BUILD_OFormatI(0x11, RS, IMM8, 0x20, RD)) - -#define BUILD_OR(RD, RS, RT) \ - (BUILD_OFormat(0x11, RS, RT, 0x20, RD)) - - - -static void EmitBranchToAt(void *At, void *To) { - unsigned long Fn = (unsigned long)To; - - unsigned *AtI = (unsigned*)At; - - AtI[0] = BUILD_OR(0, 27, 27); - - DEBUG(errs() << "Stub targeting " << To << "\n"); - - for (int x = 1; x <= 8; ++x) { - AtI[2*x - 1] = BUILD_SLLi(27,27,8); - unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF; - //DEBUG(errs() << "outputing " << hex << d << dec << "\n"); - AtI[2*x] = BUILD_ORi(27, 27, d); - } - AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv - AtI[18] = 0x00FFFFFF; //mark this as a stub -} - -void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - //FIXME - llvm_unreachable(0); -} - -static TargetJITInfo::JITCompilerFn JITCompilerFunction; -//static AlphaJITInfo* AlphaJTI; - -extern "C" { -#ifdef __alpha - - void AlphaCompilationCallbackC(long* oldpv, void* CameFromStub) - { - void* Target = JITCompilerFunction(CameFromStub); - - //rewrite the stub to an unconditional branch - if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) { - DEBUG(errs() << "Came from a stub, rewriting\n"); - EmitBranchToAt(CameFromStub, Target); - } else { - DEBUG(errs() << "confused, didn't come from stub at " << CameFromStub - << " old jump vector " << oldpv - << " new jump vector " << Target << "\n"); - } - - //Change pv to new Target - *oldpv = (long)Target; - } - - void AlphaCompilationCallback(void); - - asm( - ".text\n" - ".globl AlphaCompilationCallbackC\n" - ".align 4\n" - ".globl AlphaCompilationCallback\n" - ".ent AlphaCompilationCallback\n" -"AlphaCompilationCallback:\n" - // //get JIT's GOT - "ldgp $29, 0($27)\n" - //Save args, callee saved, and perhaps others? - //args: $16-$21 $f16-$f21 (12) - //callee: $9-$14 $f2-$f9 (14) - //others: fp:$15 ra:$26 pv:$27 (3) - "lda $30, -232($30)\n" - "stq $16, 0($30)\n" - "stq $17, 8($30)\n" - "stq $18, 16($30)\n" - "stq $19, 24($30)\n" - "stq $20, 32($30)\n" - "stq $21, 40($30)\n" - "stt $f16, 48($30)\n" - "stt $f17, 56($30)\n" - "stt $f18, 64($30)\n" - "stt $f19, 72($30)\n" - "stt $f20, 80($30)\n" - "stt $f21, 88($30)\n" - "stq $9, 96($30)\n" - "stq $10, 104($30)\n" - "stq $11, 112($30)\n" - "stq $12, 120($30)\n" - "stq $13, 128($30)\n" - "stq $14, 136($30)\n" - "stt $f2, 144($30)\n" - "stt $f3, 152($30)\n" - "stt $f4, 160($30)\n" - "stt $f5, 168($30)\n" - "stt $f6, 176($30)\n" - "stt $f7, 184($30)\n" - "stt $f8, 192($30)\n" - "stt $f9, 200($30)\n" - "stq $15, 208($30)\n" - "stq $26, 216($30)\n" - "stq $27, 224($30)\n" - - "addq $30, 224, $16\n" //pass the addr of saved pv as the first arg - "bis $0, $0, $17\n" //pass the roughly stub addr in second arg - "jsr $26, AlphaCompilationCallbackC\n" //call without saving ra - - "ldq $16, 0($30)\n" - "ldq $17, 8($30)\n" - "ldq $18, 16($30)\n" - "ldq $19, 24($30)\n" - "ldq $20, 32($30)\n" - "ldq $21, 40($30)\n" - "ldt $f16, 48($30)\n" - "ldt $f17, 56($30)\n" - "ldt $f18, 64($30)\n" - "ldt $f19, 72($30)\n" - "ldt $f20, 80($30)\n" - "ldt $f21, 88($30)\n" - "ldq $9, 96($30)\n" - "ldq $10, 104($30)\n" - "ldq $11, 112($30)\n" - "ldq $12, 120($30)\n" - "ldq $13, 128($30)\n" - "ldq $14, 136($30)\n" - "ldt $f2, 144($30)\n" - "ldt $f3, 152($30)\n" - "ldt $f4, 160($30)\n" - "ldt $f5, 168($30)\n" - "ldt $f6, 176($30)\n" - "ldt $f7, 184($30)\n" - "ldt $f8, 192($30)\n" - "ldt $f9, 200($30)\n" - "ldq $15, 208($30)\n" - "ldq $26, 216($30)\n" - "ldq $27, 224($30)\n" //this was updated in the callback with the target - - "lda $30, 232($30)\n" //restore sp - "jmp $31, ($27)\n" //jump to the new function - ".end AlphaCompilationCallback\n" - ); -#else - void AlphaCompilationCallback() { - llvm_unreachable("Cannot call AlphaCompilationCallback() on a non-Alpha arch!"); - } -#endif -} - -TargetJITInfo::StubLayout AlphaJITInfo::getStubLayout() { - // The stub contains 19 4-byte instructions, aligned at 4 bytes: - // R0 = R27 - // 8 x "R27 <<= 8; R27 |= 8-bits-of-Target" == 16 instructions - // JMP R27 - // Magic number so the compilation callback can recognize the stub. - StubLayout Result = {19 * 4, 4}; - return Result; -} - -void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) { - //assert(Fn == AlphaCompilationCallback && "Where are you going?\n"); - //Do things in a stupid slow way! - void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue(); - for (int x = 0; x < 19; ++ x) - JCE.emitWordLE(0); - EmitBranchToAt(Addr, Fn); - DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n"); - return Addr; -} - -TargetJITInfo::LazyResolverFn -AlphaJITInfo::getLazyResolverFunction(JITCompilerFn F) { - JITCompilerFunction = F; - // setZerothGOTEntry((void*)AlphaCompilationCallback); - return AlphaCompilationCallback; -} - -//These describe LDAx -static const int IMM_LOW = -32768; -static const int IMM_HIGH = 32767; -static const int IMM_MULT = 65536; - -static long getUpper16(long l) -{ - long y = l / IMM_MULT; - if (l % IMM_MULT > IMM_HIGH) - ++y; - if (l % IMM_MULT < IMM_LOW) - --y; - assert((short)y == y && "displacement out of range"); - return y; -} - -static long getLower16(long l) -{ - long h = getUpper16(l); - long y = l - h * IMM_MULT; - assert(y == (short)y && "Displacement out of range"); - return y; -} - -void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) { - for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { - unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; - long idx = 0; - bool doCommon = true; - switch ((Alpha::RelocationType)MR->getRelocationType()) { - default: llvm_unreachable("Unknown relocation type!"); - case Alpha::reloc_literal: - //This is a LDQl - idx = MR->getGOTIndex(); - DEBUG(errs() << "Literal relocation to slot " << idx); - idx = (idx - GOToffset) * 8; - DEBUG(errs() << " offset " << idx << "\n"); - break; - case Alpha::reloc_gprellow: - idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8]; - idx = getLower16(idx); - DEBUG(errs() << "gprellow relocation offset " << idx << "\n"); - DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer() - << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n"); - break; - case Alpha::reloc_gprelhigh: - idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8]; - idx = getUpper16(idx); - DEBUG(errs() << "gprelhigh relocation offset " << idx << "\n"); - DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer() - << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n"); - break; - case Alpha::reloc_gpdist: - switch (*RelocPos >> 26) { - case 0x09: //LDAH - idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos; - idx = getUpper16(idx); - DEBUG(errs() << "LDAH: " << idx << "\n"); - //add the relocation to the map - gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos; - break; - case 0x08: //LDA - assert(gpdistmap[std::make_pair(Function, MR->getConstantVal())] && - "LDAg without seeing LDAHg"); - idx = &GOTBase[GOToffset * 8] - - (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())]; - idx = getLower16(idx); - DEBUG(errs() << "LDA: " << idx << "\n"); - break; - default: - llvm_unreachable("Cannot handle gpdist yet"); - } - break; - case Alpha::reloc_bsr: { - idx = (((unsigned char*)MR->getResultPointer() - - (unsigned char*)RelocPos) >> 2) + 1; //skip first 2 inst of fun - *RelocPos |= (idx & ((1 << 21)-1)); - doCommon = false; - break; - } - } - if (doCommon) { - short x = (short)idx; - assert(x == idx); - *(short*)RelocPos = x; - } - } -} diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h deleted file mode 100644 index bd358a4..0000000 --- a/lib/Target/Alpha/AlphaJITInfo.h +++ /dev/null @@ -1,53 +0,0 @@ -//===- AlphaJITInfo.h - Alpha impl. of the JIT interface ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Alpha implementation of the TargetJITInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHA_JITINFO_H -#define ALPHA_JITINFO_H - -#include "llvm/Target/TargetJITInfo.h" -#include <map> - -namespace llvm { - class TargetMachine; - - class AlphaJITInfo : public TargetJITInfo { - protected: - TargetMachine &TM; - - //because gpdist are paired and relative to the pc of the first inst, - //we need to have some state - std::map<std::pair<void*, int>, void*> gpdistmap; - public: - explicit AlphaJITInfo(TargetMachine &tm) : TM(tm) - { useGOT = true; } - - virtual StubLayout getStubLayout(); - virtual void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE); - virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); - virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase); - - /// replaceMachineCodeForFunction - Make it so that calling the function - /// whose machine code is at OLD turns into a call to NEW, perhaps by - /// overwriting OLD with a branch to NEW. This is used for self-modifying - /// code. - /// - virtual void replaceMachineCodeForFunction(void *Old, void *New); - private: - static const unsigned GOToffset = 4096; - - }; -} - -#endif diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 327ddb4..7667fd8 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" @@ -35,29 +35,21 @@ #include <cstdlib> using namespace llvm; -//These describe LDAx -static const int IMM_LOW = -32768; -static const int IMM_HIGH = 32767; -static const int IMM_MULT = 65536; +AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) + : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP), + TII(tii) { +} -static long getUpper16(long l) -{ - long y = l / IMM_MULT; - if (l % IMM_MULT > IMM_HIGH) +static long getUpper16(long l) { + long y = l / Alpha::IMM_MULT; + if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH) ++y; return y; } -static long getLower16(long l) -{ +static long getLower16(long l) { long h = getUpper16(l); - return l - h * IMM_MULT; -} - -AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) - : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP), - TII(tii), curgpdist(0) -{ + return l - h * Alpha::IMM_MULT; } const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) @@ -86,19 +78,12 @@ BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -// -bool AlphaRegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->hasVarSizedObjects(); -} - void AlphaRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (hasFP(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (TFI->hasFP(MF)) { // If we have a frame pointer, turn the adjcallstackup instruction into a // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP, // <amt>' @@ -108,7 +93,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = TFI->getStackAlignment(); Amount = (Amount+Align-1)/Align*Align; MachineInstr *New; @@ -146,7 +131,9 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); - bool FP = hasFP(MF); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + bool FP = TFI->hasFP(MF); while (!MI.getOperand(i).isFI()) { ++i; @@ -168,7 +155,7 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, DEBUG(errs() << "Corrected Offset " << Offset << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n"); - if (Offset > IMM_HIGH || Offset < IMM_LOW) { + if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) { DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: " << Offset << "\n"); //so in this case, we need to use a temporary register, and move the @@ -186,111 +173,14 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } - -void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc()); - bool FP = hasFP(MF); - - //handle GOP offset - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29) - .addGlobalAddress(MF.getFunction()) - .addReg(Alpha::R27).addImm(++curgpdist); - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29) - .addGlobalAddress(MF.getFunction()) - .addReg(Alpha::R29).addImm(curgpdist); - - BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT)) - .addGlobalAddress(MF.getFunction()); - - // Get the number of bytes to allocate from the FrameInfo - long NumBytes = MFI->getStackSize(); - - if (FP) - NumBytes += 8; //reserve space for the old FP - - // Do we need to allocate space on the stack? - if (NumBytes == 0) return; - - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); - NumBytes = (NumBytes+Align-1)/Align*Align; - - // Update frame info to pretend that this is part of the stack... - MFI->setStackSize(NumBytes); - - // adjust stack pointer: r30 -= numbytes - NumBytes = -NumBytes; - if (NumBytes >= IMM_LOW) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) - .addReg(Alpha::R30); - } else if (getUpper16(NumBytes) >= IMM_LOW) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) - .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) - .addImm(getLower16(NumBytes)).addReg(Alpha::R30); - } else { - report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); - } - - //now if we need to, save the old FP and set the new - if (FP) - { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ)) - .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30); - //this must be the last instr in the prolog - BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15) - .addReg(Alpha::R30).addReg(Alpha::R30); - } - -} - -void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - assert((MBBI->getOpcode() == Alpha::RETDAG || - MBBI->getOpcode() == Alpha::RETDAGp) - && "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); - - bool FP = hasFP(MF); - - // Get the number of bytes allocated from the FrameInfo... - long NumBytes = MFI->getStackSize(); - - //now if we need to, restore the old FP - if (FP) { - //copy the FP into the SP (discards allocas) - BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15) - .addReg(Alpha::R15); - //restore the FP - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15) - .addImm(0).addReg(Alpha::R15); - } - - if (NumBytes != 0) { - if (NumBytes <= IMM_HIGH) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) - .addReg(Alpha::R30); - } else if (getUpper16(NumBytes) <= IMM_HIGH) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) - .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) - .addImm(getLower16(NumBytes)).addReg(Alpha::R30); - } else { - report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); - } - } -} - unsigned AlphaRegisterInfo::getRARegister() const { return Alpha::R26; } unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return hasFP(MF) ? Alpha::R15 : Alpha::R30; + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30; } unsigned AlphaRegisterInfo::getEHExceptionRegister() const { diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index b164979..b0d4dd0 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -32,8 +32,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -41,11 +39,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; @@ -57,9 +50,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { int getDwarfRegNum(unsigned RegNum, bool isEH) const; static std::string getPrettyName(unsigned reg); - -private: - mutable int curgpdist; }; } // end namespace llvm diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td index 4dc04b8..3703dd4 100644 --- a/lib/Target/Alpha/AlphaSchedule.td +++ b/lib/Target/Alpha/AlphaSchedule.td @@ -50,11 +50,11 @@ def s_ftoi : InstrItinClass; def s_itof : InstrItinClass; def s_pseudo : InstrItinClass; -//Table 2�4 Instruction Class Latency in Cycles +//Table 2-4 Instruction Class Latency in Cycles //modified some def Alpha21264Itineraries : ProcessorItineraries< - [L0, L1, FST0, FST1, U0, U1, FA, FM], [ + [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [ InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>, InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>, InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>, diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index fc9be03..b53533b 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "Alpha.h" -#include "AlphaJITInfo.h" #include "AlphaMCAsmInfo.h" #include "AlphaTargetMachine.h" #include "llvm/PassManager.h" @@ -29,8 +28,7 @@ AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : LLVMTargetMachine(T, TT), DataLayout("e-f128:128:128-n64"), - FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), - JITInfo(*this), + FrameLowering(Subtarget), Subtarget(TT, FS), TLInfo(*this), TSInfo(*this) { @@ -54,9 +52,3 @@ bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM, PM.add(createAlphaLLRPPass(*this)); return false; } -bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE) { - PM.add(createAlphaJITCodeEmitterPass(*this, JCE)); - return false; -} diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 153944e..26238fb 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -14,14 +14,14 @@ #ifndef ALPHA_TARGETMACHINE_H #define ALPHA_TARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "AlphaInstrInfo.h" -#include "AlphaJITInfo.h" #include "AlphaISelLowering.h" +#include "AlphaFrameLowering.h" #include "AlphaSelectionDAGInfo.h" #include "AlphaSubtarget.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -30,8 +30,7 @@ class GlobalValue; class AlphaTargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment AlphaInstrInfo InstrInfo; - TargetFrameInfo FrameInfo; - AlphaJITInfo JITInfo; + AlphaFrameLowering FrameLowering; AlphaSubtarget Subtarget; AlphaTargetLowering TLInfo; AlphaSelectionDAGInfo TSInfo; @@ -41,7 +40,9 @@ public: const std::string &FS); virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; } virtual const AlphaRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -53,15 +54,10 @@ public: return &TSInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } - virtual AlphaJITInfo* getJITInfo() { - return &JITInfo; - } // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE); }; } // end namespace llvm diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp deleted file mode 100644 index 5428cb9..0000000 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ /dev/null @@ -1,166 +0,0 @@ -//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format Alpha assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "Alpha.h" -#include "AlphaInstrInfo.h" -#include "AlphaTargetMachine.h" -#include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - struct AlphaAsmPrinter : public AsmPrinter { - /// Unique incrementer for label values for referencing Global values. - /// - - explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer) - : AsmPrinter(tm, Streamer) {} - - virtual const char *getPassName() const { - return "Alpha Assembly Printer"; - } - void printInstruction(const MachineInstr *MI, raw_ostream &O); - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - static const char *getRegisterName(unsigned RegNo); - - void printOp(const MachineOperand &MO, raw_ostream &O); - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd(); - void EmitStartOfAsmFile(Module &M); - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O); - }; -} // end of anonymous namespace - -#include "AlphaGenAsmWriter.inc" - -void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - if (MO.isReg()) { - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - "Not physreg??"); - O << getRegisterName(MO.getReg()); - } else if (MO.isImm()) { - O << MO.getImm(); - assert(MO.getImm() < (1 << 30)); - } else { - printOp(MO, O); - } -} - - -void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { - switch (MO.getType()) { - case MachineOperand::MO_Register: - O << getRegisterName(MO.getReg()); - return; - - case MachineOperand::MO_Immediate: - llvm_unreachable("printOp() does not handle immediate values"); - return; - - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" - << MO.getIndex(); - return; - - case MachineOperand::MO_ExternalSymbol: - O << MO.getSymbolName(); - return; - - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - return; - - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - - default: - O << "<unknown operand type: " << MO.getType() << ">"; - return; - } -} - -/// EmitFunctionBodyStart - Targets can override this to emit stuff before -/// the first basic block in the function. -void AlphaAsmPrinter::EmitFunctionBodyStart() { - OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName())); -} - -/// EmitFunctionBodyEnd - Targets can override this to emit stuff after -/// the last basic block in the function. -void AlphaAsmPrinter::EmitFunctionBodyEnd() { - OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName())); -} - -void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) { - OutStreamer.EmitRawText(StringRef("\t.arch ev6")); - OutStreamer.EmitRawText(StringRef("\t.set noat")); -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - printOperand(MI, OpNo, O); - return false; -} - -bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - O << "0("; - printOperand(MI, OpNo, O); - O << ")"; - return false; -} - -// Force static initialization. -extern "C" void LLVMInitializeAlphaAsmPrinter() { - RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget); -} diff --git a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 992c218..0000000 --- a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMAlphaAsmPrinter - AlphaAsmPrinter.cpp - ) -add_dependencies(LLVMAlphaAsmPrinter AlphaCodeGenTable_gen) diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile deleted file mode 100644 index ea13c38..0000000 --- a/lib/Target/Alpha/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Alpha/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAlphaAsmPrinter - -# Hack: we need to include 'main' alpha target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt index fbf7f3a..454262a 100644 --- a/lib/Target/Alpha/CMakeLists.txt +++ b/lib/Target/Alpha/CMakeLists.txt @@ -5,19 +5,18 @@ tablegen(AlphaGenRegisterNames.inc -gen-register-enums) tablegen(AlphaGenRegisterInfo.inc -gen-register-desc) tablegen(AlphaGenInstrNames.inc -gen-instr-enums) tablegen(AlphaGenInstrInfo.inc -gen-instr-desc) -tablegen(AlphaGenCodeEmitter.inc -gen-emitter) tablegen(AlphaGenAsmWriter.inc -gen-asm-writer) tablegen(AlphaGenDAGISel.inc -gen-dag-isel) tablegen(AlphaGenCallingConv.inc -gen-callingconv) tablegen(AlphaGenSubtarget.inc -gen-subtarget) add_llvm_target(AlphaCodeGen + AlphaAsmPrinter.cpp AlphaBranchSelector.cpp - AlphaCodeEmitter.cpp AlphaInstrInfo.cpp AlphaISelDAGToDAG.cpp AlphaISelLowering.cpp - AlphaJITInfo.cpp + AlphaFrameLowering.cpp AlphaLLRP.cpp AlphaMCAsmInfo.cpp AlphaRegisterInfo.cpp @@ -26,4 +25,4 @@ add_llvm_target(AlphaCodeGen AlphaSelectionDAGInfo.cpp ) -target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG) +add_subdirectory(TargetInfo) diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile index 54d53ab..9564be6 100644 --- a/lib/Target/Alpha/Makefile +++ b/lib/Target/Alpha/Makefile @@ -14,10 +14,10 @@ TARGET = Alpha # Make sure that tblgen is run, first thing. BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \ AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \ - AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \ + AlphaGenInstrInfo.inc \ AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \ AlphaGenCallingConv.inc AlphaGenSubtarget.inc -DIRS = AsmPrinter TargetInfo +DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp deleted file mode 100644 index 6ba258b..0000000 --- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp +++ /dev/null @@ -1,156 +0,0 @@ -//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "Blackfin.h" -#include "BlackfinInstrInfo.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class BlackfinAsmPrinter : public AsmPrinter { - public: - BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "Blackfin Assembly Printer"; - } - - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd. - static const char *getRegisterName(unsigned RegNo); - - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - }; -} // end of anonymous namespace - -#include "BlackfinGenAsmWriter.inc" - -extern "C" void LLVMInitializeBlackfinAsmPrinter() { - RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget); -} - -void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - switch (MO.getType()) { - case MachineOperand::MO_Register: - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - "Virtual registers should be already mapped!"); - O << getRegisterName(MO.getReg()); - break; - - case MachineOperand::MO_Immediate: - O << MO.getImm(); - break; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - printOffset(MO.getOffset(), O); - break; - case MachineOperand::MO_ExternalSymbol: - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - break; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" - << MO.getIndex(); - break; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - break; - default: - llvm_unreachable("<unknown operand type>"); - break; - } -} - -void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - printOperand(MI, opNum, O); - - if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) - return; - - O << " + "; - printOperand(MI, opNum+1, O); -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: return true; // Unknown modifier. - case 'r': - break; - } - } - - printOperand(MI, OpNo, O); - - return false; -} - -bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier - - O << '['; - printOperand(MI, OpNo, O); - O << ']'; - - return false; -} diff --git a/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt b/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 795aebf..0000000 --- a/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMBlackfinAsmPrinter - BlackfinAsmPrinter.cpp - ) -add_dependencies(LLVMBlackfinAsmPrinter BlackfinCodeGenTable_gen) diff --git a/lib/Target/Blackfin/AsmPrinter/Makefile b/lib/Target/Blackfin/AsmPrinter/Makefile deleted file mode 100644 index a106a23..0000000 --- a/lib/Target/Blackfin/AsmPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Blackfin/AsmPrinter/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMBlackfinAsmPrinter - -# Hack: we need to include 'main' Blackfin target directory to grab private -# headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp new file mode 100644 index 0000000..6ba258b --- /dev/null +++ b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp @@ -0,0 +1,156 @@ +//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Blackfin.h" +#include "BlackfinInstrInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class BlackfinAsmPrinter : public AsmPrinter { + public: + BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "Blackfin Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd. + static const char *getRegisterName(unsigned RegNo); + + void EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); + } + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + }; +} // end of anonymous namespace + +#include "BlackfinGenAsmWriter.inc" + +extern "C" void LLVMInitializeBlackfinAsmPrinter() { + RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget); +} + +void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Virtual registers should be already mapped!"); + O << getRegisterName(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + printOffset(MO.getOffset(), O); + break; + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + break; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + default: + llvm_unreachable("<unknown operand type>"); + break; + } +} + +void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + printOperand(MI, opNum, O); + + if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) + return; + + O << " + "; + printOperand(MI, opNum+1, O); +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'r': + break; + } + } + + printOperand(MI, OpNo, O); + + return false; +} + +bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printOperand(MI, OpNo, O); + O << ']'; + + return false; +} diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/lib/Target/Blackfin/BlackfinFrameLowering.cpp new file mode 100644 index 0000000..08bb952 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinFrameLowering.cpp @@ -0,0 +1,124 @@ +//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Blackfin implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "BlackfinFrameLowering.h" +#include "BlackfinInstrInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return DisableFramePointerElim(MF) || + MFI->adjustsStack() || MFI->hasVarSizedObjects(); +} + +// Emit a prologue that sets up a stack frame. +// On function entry, R0-R2 and P0 may hold arguments. +// R3, P1, and P2 may be used as scratch registers +void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const BlackfinRegisterInfo *RegInfo = + static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const BlackfinInstrInfo &TII = + *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo()); + + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + int FrameSize = MFI->getStackSize(); + if (FrameSize%4) { + FrameSize = (FrameSize+3) & ~3; + MFI->setStackSize(FrameSize); + } + + if (!hasFP(MF)) { + assert(!MFI->adjustsStack() && + "FP elimination on a non-leaf function is not supported"); + RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize); + return; + } + + // emit a LINK instruction + if (FrameSize <= 0x3ffff) { + BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize); + return; + } + + // Frame is too big, do a manual LINK: + // [--SP] = RETS; + // [--SP] = FP; + // FP = SP; + // P1 = -FrameSize; + // SP = SP + P1; + BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH)) + .addReg(BF::RETS, RegState::Kill); + BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH)) + .addReg(BF::FP, RegState::Kill); + BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP) + .addReg(BF::SP); + RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize); + BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP) + .addReg(BF::SP, RegState::Kill) + .addReg(BF::P1, RegState::Kill); + +} + +void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const BlackfinRegisterInfo *RegInfo = + static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const BlackfinInstrInfo &TII = + *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + DebugLoc dl = MBBI->getDebugLoc(); + + int FrameSize = MFI->getStackSize(); + assert(FrameSize%4 == 0 && "Misaligned frame size"); + + if (!hasFP(MF)) { + assert(!MFI->adjustsStack() && + "FP elimination on a non-leaf function is not supported"); + RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize); + return; + } + + // emit an UNLINK instruction + BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK)); +} + +void BlackfinFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const BlackfinRegisterInfo *RegInfo = + static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const TargetRegisterClass *RC = BF::DPRegisterClass; + + if (RegInfo->requiresRegisterScavenging(MF)) { + // Reserve a slot close to SP or frame pointer. + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } +} diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.h b/lib/Target/Blackfin/BlackfinFrameLowering.h new file mode 100644 index 0000000..3d2ee25 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinFrameLowering.h @@ -0,0 +1,46 @@ +//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHA_FRAMEINFO_H +#define ALPHA_FRAMEINFO_H + +#include "Blackfin.h" +#include "BlackfinSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class BlackfinSubtarget; + +class BlackfinFrameLowering : public TargetFrameLowering { +protected: + const BlackfinSubtarget &STI; + +public: + explicit BlackfinFrameLowering(const BlackfinSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index 80ee107..9df2aee 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -51,8 +51,7 @@ namespace { private: SDNode *Select(SDNode *N); - bool SelectADDRspii(SDNode *Op, SDValue Addr, - SDValue &Base, SDValue &Offset); + bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset); // Walk the DAG after instruction selection, fixing register class issues. void FixRegisterClasses(SelectionDAG &DAG); @@ -94,8 +93,7 @@ SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } -bool BlackfinDAGToDAGISel::SelectADDRspii(SDNode *Op, - SDValue Addr, +bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset) { FrameIndexSDNode *FIN = 0; diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 6e828e1..dd27d0a 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -15,6 +15,7 @@ #include "BlackfinISelLowering.h" #include "BlackfinTargetMachine.h" #include "llvm/Function.h" +#include "llvm/Type.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -207,7 +208,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, unsigned ObjSize = VA.getLocVT().getStoreSize(); int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo(), false, false, 0)); } } @@ -332,8 +334,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue OffsetN = DAG.getIntPtrConstant(Offset); OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN, - PseudoSourceValue::getStack(), - Offset, false, false, 0)); + MachinePointerInfo(),false, false, 0)); } } @@ -364,7 +365,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, std::vector<EVT> NodeTys; NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. SDValue Ops[] = { Chain, Callee, InFlag }; Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2); @@ -431,7 +432,7 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const { SDValue(CarryIn, 0)); // Add operands, produce sum and carry flag - SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag, + SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue, Op.getOperand(0), Op.getOperand(1)); // Store intermediate carry from Sum @@ -439,11 +440,11 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const { /* flag= */ SDValue(Sum, 1)); // Add incoming carry, again producing an output flag - Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag, + Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue, SDValue(Sum, 0), SDValue(CarryIn, 0)); // Update AC0 with the intermediate carry, producing a flag. - SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Flag, + SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue, SDValue(Carry1, 0)); // Compose (i32, flag) pair @@ -549,6 +550,52 @@ BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +BlackfinTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + + // Blackfin-specific constraints + case 'a': + case 'd': + case 'z': + case 'D': + case 'W': + case 'e': + case 'b': + case 'v': + case 'f': + case 'c': + case 't': + case 'u': + case 'k': + case 'x': + case 'y': + case 'w': + return CW_Register; + case 'A': + case 'B': + case 'C': + case 'Z': + case 'Y': + return CW_SpecificReg; + } + return weight; +} + /// getRegForInlineAsmConstraint - Return register no and class for a C_Register /// constraint. std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering:: diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index 6bebcc3..15a745f 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -39,6 +39,12 @@ namespace llvm { SelectionDAG &DAG) const; ConstraintType getConstraintType(const std::string &Constraint) const; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; std::vector<unsigned> diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index 8034a7f..5b59d77 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -23,17 +23,17 @@ def SDT_BfinCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index a518312..b4a9b84 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" @@ -50,6 +50,8 @@ BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + using namespace BF; BitVector Reserved(getNumRegs()); Reserved.set(AZ); @@ -70,20 +72,11 @@ BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(L3); Reserved.set(SP); Reserved.set(RETS); - if (hasFP(MF)) + if (TFI->hasFP(MF)) Reserved.set(FP); return Reserved; } -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || - MFI->adjustsStack() || MFI->hasVarSizedObjects(); -} - bool BlackfinRegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { return true; @@ -161,7 +154,9 @@ void BlackfinRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { int64_t Amount = I->getOperand(0).getImm(); if (Amount != 0) { assert(Amount%4 == 0 && "Unaligned call frame size"); @@ -196,6 +191,7 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc DL = MI.getDebugLoc(); unsigned FIPos; @@ -208,7 +204,7 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MI.getOperand(FIPos+1).getImm(); unsigned BaseReg = BF::FP; - if (hasFP(MF)) { + if (TFI->hasFP(MF)) { assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer"); } else { BaseReg = BF::SP; @@ -329,93 +325,15 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -void BlackfinRegisterInfo:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterClass *RC = BF::DPRegisterClass; - if (requiresRegisterScavenging(MF)) { - // Reserve a slot close to SP or frame pointer. - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } -} - -// Emit a prologue that sets up a stack frame. -// On function entry, R0-R2 and P0 may hold arguments. -// R3, P1, and P2 may be used as scratch registers -void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - int FrameSize = MFI->getStackSize(); - if (FrameSize%4) { - FrameSize = (FrameSize+3) & ~3; - MFI->setStackSize(FrameSize); - } - - if (!hasFP(MF)) { - assert(!MFI->adjustsStack() && - "FP elimination on a non-leaf function is not supported"); - adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize); - return; - } - - // emit a LINK instruction - if (FrameSize <= 0x3ffff) { - BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize); - return; - } - - // Frame is too big, do a manual LINK: - // [--SP] = RETS; - // [--SP] = FP; - // FP = SP; - // P1 = -FrameSize; - // SP = SP + P1; - BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH)) - .addReg(BF::RETS, RegState::Kill); - BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH)) - .addReg(BF::FP, RegState::Kill); - BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP) - .addReg(BF::SP); - loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP) - .addReg(BF::SP, RegState::Kill) - .addReg(BF::P1, RegState::Kill); - -} - -void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - DebugLoc dl = MBBI->getDebugLoc(); - - int FrameSize = MFI->getStackSize(); - assert(FrameSize%4 == 0 && "Misaligned frame size"); - - if (!hasFP(MF)) { - assert(!MFI->adjustsStack() && - "FP elimination on a non-leaf function is not supported"); - adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize); - return; - } - - // emit an UNLINK instruction - BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK)); -} - unsigned BlackfinRegisterInfo::getRARegister() const { return BF::RETS; } unsigned BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return hasFP(MF) ? BF::FP : BF::SP; + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + return TFI->hasFP(MF) ? BF::FP : BF::SP; } unsigned BlackfinRegisterInfo::getEHExceptionRegister() const { diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index bb83c34..642b8ad 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -41,8 +41,6 @@ namespace llvm { return &BF::PRegClass; } - bool hasFP(const MachineFunction &MF) const; - // bool hasReservedCallFrame(MachineFunction &MF) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; @@ -54,12 +52,6 @@ namespace llvm { void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; - - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getRARegister() const; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td index e1cfae9..f5dd439 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.td +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td @@ -252,9 +252,9 @@ def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> { PClass::iterator PClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); return allocation_order_begin(MF) - + (RI->hasFP(MF) ? 7 : 6); + + (TFI->hasFP(MF) ? 7 : 6); } }]; } @@ -275,9 +275,9 @@ def DP : RegisterClass<"BF", [i32], 32, DPClass::iterator DPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); return allocation_order_begin(MF) - + (RI->hasFP(MF) ? 15 : 14); + + (TFI->hasFP(MF) ? 15 : 14); } }]; } @@ -295,9 +295,9 @@ def GR : RegisterClass<"BF", [i32], 32, GRClass::iterator GRClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); return allocation_order_begin(MF) - + (RI->hasFP(MF) ? 31 : 30); + + (TFI->hasFP(MF) ? 31 : 30); } }]; } @@ -318,9 +318,9 @@ def ALL : RegisterClass<"BF", [i32], 32, ALLClass::iterator ALLClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); return allocation_order_begin(MF) - + (RI->hasFP(MF) ? 31 : 30); + + (TFI->hasFP(MF) ? 31 : 30); } }]; } @@ -334,9 +334,9 @@ def PI : RegisterClass<"BF", [i32], 32, PIClass::iterator PIClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); return allocation_order_begin(MF) - + (RI->hasFP(MF) ? 11 : 10); + + (TFI->hasFP(MF) ? 11 : 10); } }]; } diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp index 66a2f68..e11920f 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -33,7 +33,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), - FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) { + FrameLowering(Subtarget) { } bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h index a63aa54..29b2b17 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -14,14 +14,15 @@ #ifndef BLACKFINTARGETMACHINE_H #define BLACKFINTARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "BlackfinInstrInfo.h" -#include "BlackfinSubtarget.h" +#include "BlackfinIntrinsicInfo.h" #include "BlackfinISelLowering.h" +#include "BlackfinFrameLowering.h" +#include "BlackfinSubtarget.h" #include "BlackfinSelectionDAGInfo.h" -#include "BlackfinIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -31,14 +32,16 @@ namespace llvm { BlackfinTargetLowering TLInfo; BlackfinSelectionDAGInfo TSInfo; BlackfinInstrInfo InstrInfo; - TargetFrameInfo FrameInfo; + BlackfinFrameLowering FrameLowering; BlackfinIntrinsicInfo IntrinsicInfo; public: BlackfinTargetMachine(const Target &T, const std::string &TT, const std::string &FS); virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } virtual const BlackfinSubtarget *getSubtargetImpl() const { return &Subtarget; } diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt index f8847d0..a47299f 100644 --- a/lib/Target/Blackfin/CMakeLists.txt +++ b/lib/Target/Blackfin/CMakeLists.txt @@ -12,13 +12,17 @@ tablegen(BlackfinGenCallingConv.inc -gen-callingconv) tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic) add_llvm_target(BlackfinCodeGen + BlackfinAsmPrinter.cpp BlackfinInstrInfo.cpp BlackfinIntrinsicInfo.cpp BlackfinISelDAGToDAG.cpp BlackfinISelLowering.cpp + BlackfinFrameLowering.cpp BlackfinMCAsmInfo.cpp BlackfinRegisterInfo.cpp BlackfinSubtarget.cpp BlackfinTargetMachine.cpp BlackfinSelectionDAGInfo.cpp ) + +add_subdirectory(TargetInfo) diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile index 339bef9..5eb8e9a 100644 --- a/lib/Target/Blackfin/Makefile +++ b/lib/Target/Blackfin/Makefile @@ -18,7 +18,7 @@ BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \ BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \ BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc -DIRS = AsmPrinter TargetInfo +DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 270fff6..6c555a3 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -47,12 +47,16 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/MathExtras.h" -#include "llvm/System/Host.h" +#include "llvm/Support/Host.h" #include "llvm/Config/config.h" #include <algorithm> +// Some ms header decided to define setjmp as _setjmp, undo this for this file. +#ifdef _MSC_VER +#undef setjmp +#endif using namespace llvm; -extern "C" void LLVMInitializeCBackendTarget() { +extern "C" void LLVMInitializeCBackendTarget() { // Register the target. RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget); } @@ -72,8 +76,10 @@ namespace { class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass { public: static char ID; - CBackendNameAllUsedStructsAndMergeFunctions() - : ModulePass(ID) {} + CBackendNameAllUsedStructsAndMergeFunctions() + : ModulePass(ID) { + initializeFindUsedTypesPass(*PassRegistry::getPassRegistry()); + } void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<FindUsedTypes>(); } @@ -110,9 +116,10 @@ namespace { public: static char ID; explicit CWriter(formatted_raw_ostream &o) - : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), + : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) { + initializeLoopInfoPass(*PassRegistry::getPassRegistry()); FPCounter = 0; } @@ -183,7 +190,7 @@ namespace { Out << ")"; } } - + void writeOperand(Value *Operand, bool Static = false); void writeInstComputationInline(Instruction &I); void writeOperandInternal(Value *Operand, bool Static = false); @@ -224,7 +231,7 @@ namespace { return ByValParams.count(A); return isa<GlobalVariable>(V) || isDirectAlloca(V); } - + // isInlinableInst - Attempt to inline instructions into their uses to build // trees as much as possible. To do this, we have to consistently decide // what is acceptable to inline, so that variable declarations don't get @@ -233,7 +240,7 @@ namespace { static bool isInlinableInst(const Instruction &I) { // Always inline cmp instructions, even if they are shared by multiple // expressions. GCC generates horrible code if we don't. - if (isa<CmpInst>(I)) + if (isa<CmpInst>(I)) return true; // Must be an expression, must be used exactly once. If it is dead, we @@ -270,14 +277,14 @@ namespace { return 0; return AI; } - + // isInlineAsm - Check if the instruction is a call to an inline asm chunk static bool isInlineAsm(const Instruction& I) { if (const CallInst *CI = dyn_cast<CallInst>(&I)) return isa<InlineAsm>(CI->getCalledValue()); return false; } - + // Instruction visitation functions friend class InstVisitor<CWriter>; @@ -310,7 +317,7 @@ namespace { void visitStoreInst (StoreInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); void visitVAArgInst (VAArgInst &I); - + void visitInsertElementInst(InsertElementInst &I); void visitExtractElementInst(ExtractElementInst &I); void visitShuffleVectorInst(ShuffleVectorInst &SVI); @@ -346,7 +353,7 @@ char CWriter::ID = 0; static std::string CBEMangle(const std::string &S) { std::string Result; - + for (unsigned i = 0, e = S.size(); i != e; ++i) if (isalnum(S[i]) || S[i] == '_') { Result += S[i]; @@ -375,7 +382,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end(); TI != TE; ) { TypeSymbolTable::iterator I = TI++; - + // If this isn't a struct or array type, remove it from our set of types // to name. This simplifies emission later. if (!I->second->isStructTy() && !I->second->isOpaqueTy() && @@ -403,8 +410,8 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { ++RenameCounter; Changed = true; } - - + + // Loop over all external functions and globals. If we have two with // identical names, merge them. // FIXME: This code should disappear when we don't allow values with the same @@ -440,7 +447,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { } } } - + return Changed; } @@ -479,20 +486,20 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, FunctionInnards << "void"; } FunctionInnards << ')'; - printType(Out, RetTy, + printType(Out, RetTy, /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); } raw_ostream & CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && + assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && "Invalid type for printSimpleType"); switch (Ty->getTypeID()) { case Type::VoidTyID: return Out << "void " << NameSoFar; case Type::IntegerTyID: { unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits == 1) + if (NumBits == 1) return Out << "bool " << NameSoFar; else if (NumBits <= 8) return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar; @@ -502,7 +509,7 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar; else if (NumBits <= 64) return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar; - else { + else { assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar; } @@ -514,14 +521,18 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, case Type::X86_FP80TyID: case Type::PPC_FP128TyID: case Type::FP128TyID: return Out << "long double " << NameSoFar; - + + case Type::X86_MMXTyID: + return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned, + " __attribute__((vector_size(64))) " + NameSoFar); + case Type::VectorTyID: { const VectorType *VTy = cast<VectorType>(Ty); return printSimpleType(Out, VTy->getElementType(), isSigned, " __attribute__((vector_size(" + utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); } - + default: #ifndef NDEBUG errs() << "Unknown primitive type: " << *Ty << "\n"; @@ -575,7 +586,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, FunctionInnards << "void"; } FunctionInnards << ')'; - printType(Out, FTy->getReturnType(), + printType(Out, FTy->getReturnType(), /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); return Out; } @@ -759,7 +770,7 @@ static bool isFPCSafeToPrint(const ConstantFP *CFP) { } /// Print out the casting for a cast operation. This does the double casting -/// necessary for conversion to the destination type, if necessary. +/// necessary for conversion to the destination type, if necessary. /// @brief Print a cast void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) { // Print the destination type cast @@ -782,7 +793,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) { printSimpleType(Out, DstTy, false); Out << ')'; break; - case Instruction::SExt: + case Instruction::SExt: case Instruction::FPToSI: // For these, make sure we get a signed dest Out << '('; printSimpleType(Out, DstTy, true); @@ -803,7 +814,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) { case Instruction::SIToFP: case Instruction::SExt: Out << '('; - printSimpleType(Out, SrcTy, true); + printSimpleType(Out, SrcTy, true); Out << ')'; break; case Instruction::IntToPtr: @@ -895,7 +906,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { case Instruction::AShr: { Out << '('; - bool NeedsClosingParens = printConstExprCast(CE, Static); + bool NeedsClosingParens = printConstExprCast(CE, Static); printConstantWithCast(CE->getOperand(0), CE->getOpcode()); switch (CE->getOpcode()) { case Instruction::Add: @@ -905,10 +916,10 @@ void CWriter::printConstant(Constant *CPV, bool Static) { case Instruction::Mul: case Instruction::FMul: Out << " * "; break; case Instruction::URem: - case Instruction::SRem: + case Instruction::SRem: case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SDiv: case Instruction::FDiv: Out << " / "; break; case Instruction::And: Out << " & "; break; case Instruction::Or: Out << " | "; break; @@ -920,7 +931,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { switch (CE->getPredicate()) { case ICmpInst::ICMP_EQ: Out << " == "; break; case ICmpInst::ICMP_NE: Out << " != "; break; - case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: Out << " < "; break; case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_ULE: Out << " <= "; break; @@ -940,8 +951,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { return; } case Instruction::FCmp: { - Out << '('; - bool NeedsClosingParens = printConstExprCast(CE, Static); + Out << '('; + bool NeedsClosingParens = printConstExprCast(CE, Static); if (CE->getPredicate() == FCmpInst::FCMP_FALSE) Out << "0"; else if (CE->getPredicate() == FCmpInst::FCMP_TRUE) @@ -1006,18 +1017,18 @@ void CWriter::printConstant(Constant *CPV, bool Static) { else { Out << "(("; printSimpleType(Out, Ty, false) << ')'; - if (CI->isMinValue(true)) + if (CI->isMinValue(true)) Out << CI->getZExtValue() << 'u'; else Out << CI->getSExtValue(); Out << ')'; } return; - } + } switch (CPV->getType()->getTypeID()) { case Type::FloatTyID: - case Type::DoubleTyID: + case Type::DoubleTyID: case Type::X86_FP80TyID: case Type::PPC_FP128TyID: case Type::FP128TyID: { @@ -1027,8 +1038,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { // Because of FP precision problems we must load from a stack allocated // value that holds the value in hex. Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ? - "float" : - FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? + "float" : + FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? "double" : "long double") << "*)&FPConstant" << I->second << ')'; @@ -1047,7 +1058,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo); V = Tmp.convertToDouble(); } - + if (IsNAN(V)) { // The value is NaN @@ -1211,10 +1222,10 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. case Instruction::LShr: - case Instruction::URem: + case Instruction::URem: case Instruction::UDiv: NeedsExplicitCast = true; break; case Instruction::AShr: - case Instruction::SRem: + case Instruction::SRem: case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break; case Instruction::SExt: Ty = CE->getType(); @@ -1267,7 +1278,7 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { switch (Opcode) { default: // for most instructions, it doesn't matter - break; + break; case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -1294,7 +1305,7 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { Out << ")"; printConstant(CPV, false); Out << ")"; - } else + } else printConstant(CPV, false); } @@ -1312,16 +1323,16 @@ std::string CWriter::GetValueName(const Value *Operand) { Mang->getNameWithPrefix(Str, GV, false); return CBEMangle(Str.str().str()); } - + std::string Name = Operand->getName(); - + if (Name.empty()) { // Assign unique names to local temporaries. unsigned &No = AnonValueNumbers[Operand]; if (No == 0) No = ++NextAnonValueNumber; Name = "tmp__" + utostr(No); } - + std::string VarName; VarName.reserve(Name.capacity()); @@ -1348,7 +1359,7 @@ void CWriter::writeInstComputationInline(Instruction &I) { // Validate this. const Type *Ty = I.getType(); if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) && - Ty!=Type::getInt8Ty(I.getContext()) && + Ty!=Type::getInt8Ty(I.getContext()) && Ty!=Type::getInt16Ty(I.getContext()) && Ty!=Type::getInt32Ty(I.getContext()) && Ty!=Type::getInt64Ty(I.getContext()))) { @@ -1364,12 +1375,12 @@ void CWriter::writeInstComputationInline(Instruction &I) { if (I.getType() == Type::getInt1Ty(I.getContext()) && !isa<ICmpInst>(I) && !isa<FCmpInst>(I)) NeedBoolTrunc = true; - + if (NeedBoolTrunc) Out << "(("; - + visit(I); - + if (NeedBoolTrunc) Out << ")&1)"; } @@ -1404,9 +1415,9 @@ void CWriter::writeOperand(Value *Operand, bool Static) { Out << ')'; } -// Some instructions need to have their result value casted back to the -// original types because their operands were casted to the expected type. -// This function takes care of detecting that case and printing the cast +// Some instructions need to have their result value casted back to the +// original types because their operands were casted to the expected type. +// This function takes care of detecting that case and printing the cast // for the Instruction. bool CWriter::writeInstructionCast(const Instruction &I) { const Type *Ty = I.getOperand(0)->getType(); @@ -1417,15 +1428,15 @@ bool CWriter::writeInstructionCast(const Instruction &I) { // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. case Instruction::LShr: - case Instruction::URem: - case Instruction::UDiv: + case Instruction::URem: + case Instruction::UDiv: Out << "(("; printSimpleType(Out, Ty, false); Out << ")("; return true; case Instruction::AShr: - case Instruction::SRem: - case Instruction::SDiv: + case Instruction::SRem: + case Instruction::SDiv: Out << "(("; printSimpleType(Out, Ty, true); Out << ")("; @@ -1437,7 +1448,7 @@ bool CWriter::writeInstructionCast(const Instruction &I) { // Write the operand with a cast to another type based on the Opcode being used. // This will be used in cases where an instruction has specific type -// requirements (usually signedness) for its operands. +// requirements (usually signedness) for its operands. void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { // Extract the operand's type, we'll need it. @@ -1455,7 +1466,7 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { switch (Opcode) { default: // for most instructions, it doesn't matter - break; + break; case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -1484,14 +1495,14 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { Out << ")"; writeOperand(Operand); Out << ")"; - } else + } else writeOperand(Operand); } -// Write the operand with a cast to another type based on the icmp predicate -// being used. +// Write the operand with a cast to another type based on the icmp predicate +// being used. void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { - // This has to do a cast to ensure the operand has the right signedness. + // This has to do a cast to ensure the operand has the right signedness. // Also, if the operand is a pointer, we make sure to cast to an integer when // doing the comparison both for signedness and so that the C compiler doesn't // optimize things like "p < NULL" to false (p may contain an integer value @@ -1504,7 +1515,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { writeOperand(Operand); return; } - + // Should this be a signed comparison? If so, convert to signed. bool castIsSigned = Cmp.isSigned(); @@ -1512,7 +1523,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { const Type* OpTy = Operand->getType(); if (OpTy->isPointerTy()) OpTy = TD->getIntPtrType(Operand->getContext()); - + Out << "(("; printSimpleType(Out, OpTy, castIsSigned); Out << ")"; @@ -1579,7 +1590,7 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out, Out << "#if defined(__GNUC__)\n" << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n" << "#endif\n\n"; - + // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise // From the GCC documentation: // @@ -1635,7 +1646,7 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out, << "#define __ATTRIBUTE_DTOR__\n" << "#define LLVM_ASM(X)\n" << "#endif\n\n"; - + Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n" << "#define __builtin_stack_save() 0 /* not implemented */\n" << "#define __builtin_stack_restore(X) /* noop */\n" @@ -1658,11 +1669,11 @@ static void generateCompilerSpecificCode(formatted_raw_ostream& Out, static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){ ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); if (!InitList) return; - + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. - + if (CS->getOperand(1)->isNullValue()) return; // Found a null terminator, exit printing. Constant *FP = CS->getOperand(1); @@ -1690,12 +1701,12 @@ static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) { else if (GV->getName() == "llvm.global_dtors") return GlobalDtors; } - + // Otherwise, if it is other metadata, don't print it. This catches things // like debug information. if (GV->getSection() == "llvm.metadata") return NotPrinted; - + return NotSpecial; } @@ -1726,7 +1737,7 @@ static void PrintEscapedString(const std::string &Str, raw_ostream &Out) { bool CWriter::doInitialization(Module &M) { FunctionPass::doInitialization(M); - + // Initialize TheModule = &M; @@ -1738,13 +1749,13 @@ bool CWriter::doInitialization(Module &M) { std::string Triple = TheModule->getTargetTriple(); if (Triple.empty()) Triple = llvm::sys::getHostTriple(); - + std::string E; if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) TAsm = Match->createAsmInfo(Triple); -#endif +#endif TAsm = new CBEMCAsmInfo(); - TCtx = new MCContext(*TAsm); + TCtx = new MCContext(*TAsm, NULL); Mang = new Mangler(*TCtx, *TD); // Keep track of which functions are static ctors/dtors so they can have @@ -1762,7 +1773,7 @@ bool CWriter::doInitialization(Module &M) { break; } } - + // get declaration for alloca Out << "/* Provide Declarations */\n"; Out << "#include <stdarg.h>\n"; // Varargs support @@ -1819,7 +1830,7 @@ bool CWriter::doInitialization(Module &M) { for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() || + if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() || I->hasCommonLinkage()) Out << "extern "; else if (I->hasDLLImportLinkage()) @@ -1844,7 +1855,7 @@ bool CWriter::doInitialization(Module &M) { Out << "double fmod(double, double);\n"; // Support for FP rem Out << "float fmodf(float, float);\n"; Out << "long double fmodl(long double, long double);\n"; - + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { // Don't print declarations for intrinsic functions. if (!I->isIntrinsic() && I->getName() != "setjmp" && @@ -1852,7 +1863,7 @@ bool CWriter::doInitialization(Module &M) { if (I->hasExternalWeakLinkage()) Out << "extern "; printFunctionSignature(I, true); - if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) + if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) Out << " __ATTRIBUTE_WEAK__"; if (I->hasExternalWeakLinkage()) Out << " __EXTERNAL_WEAK__"; @@ -1862,10 +1873,10 @@ bool CWriter::doInitialization(Module &M) { Out << " __ATTRIBUTE_DTOR__"; if (I->hasHiddenVisibility()) Out << " __HIDDEN__"; - + if (I->hasName() && I->getName()[0] == 1) Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; - + Out << ";\n"; } } @@ -1889,7 +1900,7 @@ bool CWriter::doInitialization(Module &M) { if (I->isThreadLocal()) Out << "__thread "; - printType(Out, I->getType()->getElementType(), false, + printType(Out, I->getType()->getElementType(), false, GetValueName(I)); if (I->hasLinkOnceLinkage()) @@ -1909,7 +1920,7 @@ bool CWriter::doInitialization(Module &M) { // Output the global variable definitions and contents... if (!M.global_empty()) { Out << "\n\n/* Global Variable Definitions and Initialization */\n"; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) if (!I->isDeclaration()) { // Ignore special globals, such as debug info. @@ -1927,7 +1938,7 @@ bool CWriter::doInitialization(Module &M) { if (I->isThreadLocal()) Out << "__thread "; - printType(Out, I->getType()->getElementType(), false, + printType(Out, I->getType()->getElementType(), false, GetValueName(I)); if (I->hasLinkOnceLinkage()) Out << " __attribute__((common))"; @@ -1938,7 +1949,7 @@ bool CWriter::doInitialization(Module &M) { if (I->hasHiddenVisibility()) Out << " __HIDDEN__"; - + // If the initializer is not null, emit the initializer. If it is null, // we try to avoid emitting large amounts of zeros. The problem with // this, however, occurs when the variable has weak linkage. In this @@ -1972,7 +1983,7 @@ bool CWriter::doInitialization(Module &M) { if (!M.empty()) Out << "\n\n/* Function Bodies */\n"; - // Emit some helper functions for dealing with FCMP instruction's + // Emit some helper functions for dealing with FCMP instruction's // predicates Out << "static inline int llvm_fcmp_ord(double X, double Y) { "; Out << "return X == X && Y == Y; }\n"; @@ -2027,7 +2038,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) { printFloatingPointConstants(CE->getOperand(i)); return; } - + // Otherwise, check for a FP constant that we need to print. const ConstantFP *FPC = dyn_cast<ConstantFP>(C); if (FPC == 0 || @@ -2038,7 +2049,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) { return; FPConstantMap[FPC] = FPCounter; // Number the FP constants - + if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) { double Val = FPC->getValueAPF().convertToDouble(); uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); @@ -2057,7 +2068,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) { APInt api = FPC->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Out << "static const ConstantFP80Ty FPConstant" << FPCounter++ - << " = { 0x" << utohexstr(p[0]) + << " = { 0x" << utohexstr(p[0]) << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}" << "}; /* Long double constant */\n"; } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) || @@ -2068,7 +2079,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) { << " = { 0x" << utohexstr(p[0]) << ", 0x" << utohexstr(p[1]) << "}; /* Long double constant */\n"; - + } else { llvm_unreachable("Unknown float type!"); } @@ -2140,12 +2151,12 @@ void CWriter::printContainedStructs(const Type *Ty, // Don't walk through pointers. if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) return; - + // Print all contained types first. for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I) printContainedStructs(*I, StructPrinted); - + if (Ty->isStructTy() || Ty->isArrayTy()) { // Check to see if we have already printed this struct. if (StructPrinted.insert(Ty).second) { @@ -2160,10 +2171,10 @@ void CWriter::printContainedStructs(const Type *Ty, void CWriter::printFunctionSignature(const Function *F, bool Prototype) { /// isStructReturn - Should this function actually return a struct by-value? bool isStructReturn = F->hasStructRetAttr(); - + if (F->hasLocalLinkage()) Out << "static "; if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) "; - if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) "; + if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) "; switch (F->getCallingConv()) { case CallingConv::X86_StdCall: Out << "__attribute__((stdcall)) "; @@ -2177,7 +2188,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { default: break; } - + // Loop over the arguments, printing them... const FunctionType *FT = cast<FunctionType>(F->getFunctionType()); const AttrListPtr &PAL = F->getAttributes(); @@ -2193,7 +2204,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { if (!F->arg_empty()) { Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); unsigned Idx = 1; - + // If this is a struct-return function, don't print the hidden // struct-return argument. if (isStructReturn) { @@ -2201,7 +2212,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { ++I; ++Idx; } - + std::string ArgName; for (; I != E; ++I) { if (PrintedArg) FunctionInnards << ", "; @@ -2225,7 +2236,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { // Loop over the arguments, printing them. FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end(); unsigned Idx = 1; - + // If this is a struct-return function, don't print the hidden // struct-return argument. if (isStructReturn) { @@ -2233,7 +2244,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { ++I; ++Idx; } - + for (; I != E; ++I) { if (PrintedArg) FunctionInnards << ", "; const Type *ArgTy = *I; @@ -2262,7 +2273,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { FunctionInnards << "void"; // ret() -> ret(void) in C. } FunctionInnards << ')'; - + // Get the return tpe for the function. const Type *RetTy; if (!isStructReturn) @@ -2271,9 +2282,9 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { // If this is a struct-return function, print the struct-return type. RetTy = cast<PointerType>(FT->getParamType(0))->getElementType(); } - + // Print out the return type and the signature built above. - printType(Out, RetTy, + printType(Out, RetTy, /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); } @@ -2293,7 +2304,7 @@ void CWriter::printFunction(Function &F) { printFunctionSignature(&F, false); Out << " {\n"; - + // If this is a struct return function, handle the result with magic. if (isStructReturn) { const Type *StructTy = @@ -2303,13 +2314,13 @@ void CWriter::printFunction(Function &F) { Out << "; /* Struct return temporary */\n"; Out << " "; - printType(Out, F.arg_begin()->getType(), false, + printType(Out, F.arg_begin()->getType(), false, GetValueName(F.arg_begin())); Out << " = &StructReturn;\n"; } bool PrintedVar = false; - + // print local variable information for the function for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { if (const AllocaInst *AI = isDirectAlloca(&*I)) { @@ -2317,7 +2328,7 @@ void CWriter::printFunction(Function &F) { printType(Out, AI->getAllocatedType(), false, GetValueName(AI)); Out << "; /* Address-exposed local */\n"; PrintedVar = true; - } else if (I->getType() != Type::getVoidTy(F.getContext()) && + } else if (I->getType() != Type::getVoidTy(F.getContext()) && !isInlinableInst(*I)) { Out << " "; printType(Out, I->getType(), false, GetValueName(&*I)); @@ -2333,7 +2344,7 @@ void CWriter::printFunction(Function &F) { } // We need a temporary for the BitCast to use so it can pluck a value out // of a union to do the BitCast. This is separate from the need for a - // variable to hold the result of the BitCast. + // variable to hold the result of the BitCast. if (isFPIntBitCast(*I)) { Out << " llvmBitCastUnion " << GetValueName(&*I) << "__BITCAST_TEMPORARY;\n"; @@ -2421,7 +2432,7 @@ void CWriter::visitReturnInst(ReturnInst &I) { Out << " return StructReturn;\n"; return; } - + // Don't output a void return if this is the last basic block in the function if (I.getNumOperands() == 0 && &*--I.getParent()->getParent()->end() == I.getParent() && @@ -2578,7 +2589,7 @@ void CWriter::visitBinaryOperator(Instruction &I) { // We must cast the results of binary operations which might be promoted. bool needsCast = false; if ((I.getType() == Type::getInt8Ty(I.getContext())) || - (I.getType() == Type::getInt16Ty(I.getContext())) + (I.getType() == Type::getInt16Ty(I.getContext())) || (I.getType() == Type::getFloatTy(I.getContext()))) { needsCast = true; Out << "(("; @@ -2630,7 +2641,7 @@ void CWriter::visitBinaryOperator(Instruction &I) { case Instruction::SRem: case Instruction::FRem: Out << " % "; break; case Instruction::UDiv: - case Instruction::SDiv: + case Instruction::SDiv: case Instruction::FDiv: Out << " / "; break; case Instruction::And: Out << " & "; break; case Instruction::Or: Out << " | "; break; @@ -2638,7 +2649,7 @@ void CWriter::visitBinaryOperator(Instruction &I) { case Instruction::Shl : Out << " << "; break; case Instruction::LShr: case Instruction::AShr: Out << " >> "; break; - default: + default: #ifndef NDEBUG errs() << "Invalid operator type!" << I; #endif @@ -2681,7 +2692,7 @@ void CWriter::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_SGT: Out << " > "; break; default: #ifndef NDEBUG - errs() << "Invalid icmp predicate!" << I; + errs() << "Invalid icmp predicate!" << I; #endif llvm_unreachable(0); } @@ -2754,7 +2765,7 @@ void CWriter::visitCastInst(CastInst &I) { if (isFPIntBitCast(I)) { Out << '('; // These int<->float and long<->double casts need to be handled specially - Out << GetValueName(&I) << "__BITCAST_TEMPORARY." + Out << GetValueName(&I) << "__BITCAST_TEMPORARY." << getFloatBitCastField(I.getOperand(0)->getType()) << " = "; writeOperand(I.getOperand(0)); Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY." @@ -2762,7 +2773,7 @@ void CWriter::visitCastInst(CastInst &I) { Out << ')'; return; } - + Out << '('; printCast(I.getOpcode(), SrcTy, DstTy); @@ -2770,15 +2781,15 @@ void CWriter::visitCastInst(CastInst &I) { if (SrcTy == Type::getInt1Ty(I.getContext()) && I.getOpcode() == Instruction::SExt) Out << "0-"; - + writeOperand(I.getOperand(0)); - - if (DstTy == Type::getInt1Ty(I.getContext()) && + + if (DstTy == Type::getInt1Ty(I.getContext()) && (I.getOpcode() == Instruction::Trunc || I.getOpcode() == Instruction::FPToUI || I.getOpcode() == Instruction::FPToSI || I.getOpcode() == Instruction::PtrToInt)) { - // Make sure we really get a trunc to bool by anding the operand with 1 + // Make sure we really get a trunc to bool by anding the operand with 1 Out << "&1u"; } Out << ')'; @@ -2835,7 +2846,7 @@ void CWriter::lowerIntrinsics(Function &F) { #undef GET_GCC_BUILTIN_NAME // If we handle it, don't lower it. if (BuiltinName[0]) break; - + // All other intrinsic calls we must lower. Instruction *Before = 0; if (CI != &BB->front()) @@ -2858,7 +2869,7 @@ void CWriter::lowerIntrinsics(Function &F) { break; } - // We may have collected some prototypes to emit in the loop above. + // We may have collected some prototypes to emit in the loop above. // Emit them now, before the function that uses them is emitted. But, // be careful not to emit them twice. std::vector<Function*>::iterator I = prototypesToGen.begin(); @@ -2898,9 +2909,9 @@ void CWriter::visitCallInst(CallInst &I) { writeOperandDeref(I.getArgOperand(0)); Out << " = "; } - + if (I.isTailCall()) Out << " /*tail*/ "; - + if (!WroteCallee) { // If this is an indirect call to a struct return function, we need to cast // the pointer. Ditto for indirect calls with byval arguments. @@ -2924,7 +2935,7 @@ void CWriter::visitCallInst(CallInst &I) { NeedsCast = true; Callee = RF; } - + if (NeedsCast) { // Ok, just cast the pointer type. Out << "(("; @@ -2957,14 +2968,14 @@ void CWriter::visitCallInst(CallInst &I) { ++AI; ++ArgNo; } - + for (; AI != AE; ++AI, ++ArgNo) { if (PrintedArg) Out << ", "; if (ArgNo < NumDeclaredParams && (*AI)->getType() != FTy->getParamType(ArgNo)) { Out << '('; - printType(Out, FTy->getParamType(ArgNo), + printType(Out, FTy->getParamType(ArgNo), /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt)); Out << ')'; } @@ -2993,7 +3004,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, #include "llvm/Intrinsics.gen" #undef GET_GCC_BUILTIN_NAME assert(BuiltinName[0] && "Unknown LLVM intrinsic!"); - + Out << BuiltinName; WroteCallee = true; return false; @@ -3003,7 +3014,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, return true; case Intrinsic::vastart: Out << "0; "; - + Out << "va_start(*(va_list*)"; writeOperand(I.getArgOperand(0)); Out << ", "; @@ -3081,7 +3092,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, case Intrinsic::x86_sse2_cmp_pd: Out << '('; printType(Out, I.getType()); - Out << ')'; + Out << ')'; // Multiple GCC builtins multiplex onto this intrinsic. switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) { default: llvm_unreachable("Invalid llvm.x86.sse.cmp!"); @@ -3102,7 +3113,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << 's'; else Out << 'd'; - + Out << "("; writeOperand(I.getArgOperand(0)); Out << ", "; @@ -3112,7 +3123,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, case Intrinsic::ppc_altivec_lvsl: Out << '('; printType(Out, I.getType()); - Out << ')'; + Out << ')'; Out << "__builtin_altivec_lvsl(0, (void*)"; writeOperand(I.getArgOperand(0)); Out << ")"; @@ -3132,13 +3143,13 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { std::string Triple = TheModule->getTargetTriple(); if (Triple.empty()) Triple = llvm::sys::getHostTriple(); - + std::string E; if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) TargetAsm = Match->createAsmInfo(Triple); else return c.Codes[0]; - + const char *const *table = TargetAsm->getAsmCBE(); // Search the translation table if it exists. @@ -3164,7 +3175,7 @@ static std::string gccifyAsm(std::string asmstr) { if (asmstr[i + 1] == '{') { std::string::size_type a = asmstr.find_first_of(':', i + 1); std::string::size_type b = asmstr.find_first_of('}', i + 1); - std::string n = "%" + + std::string n = "%" + asmstr.substr(a + 1, b - a - 1) + asmstr.substr(i + 2, a - i - 2); asmstr.replace(i, b - i + 1, n); @@ -3174,7 +3185,7 @@ static std::string gccifyAsm(std::string asmstr) { } else if (asmstr[i] == '%')//grr { asmstr.replace(i, 1, "%%"); ++i;} - + return asmstr; } @@ -3182,8 +3193,8 @@ static std::string gccifyAsm(std::string asmstr) { // handle communitivity void CWriter::visitInlineAsm(CallInst &CI) { InlineAsm* as = cast<InlineAsm>(CI.getCalledValue()); - std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints(); - + InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints(); + std::vector<std::pair<Value*, int> > ResultVals; if (CI.getType() == Type::getVoidTy(CI.getContext())) ; @@ -3193,27 +3204,27 @@ void CWriter::visitInlineAsm(CallInst &CI) { } else { ResultVals.push_back(std::make_pair(&CI, -1)); } - + // Fix up the asm string for gcc and emit it. Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n"; Out << " :"; unsigned ValueCount = 0; bool IsFirst = true; - + // Convert over all the output constraints. - for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(), + for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), E = Constraints.end(); I != E; ++I) { - + if (I->Type != InlineAsm::isOutput) { ++ValueCount; continue; // Ignore non-output constraints. } - + assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); std::string C = InterpretASMConstraint(*I); if (C.empty()) continue; - + if (!IsFirst) { Out << ", "; IsFirst = false; @@ -3222,7 +3233,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { // Unpack the dest. Value *DestVal; int DestValNo = -1; - + if (ValueCount < ResultVals.size()) { DestVal = ResultVals[ValueCount].first; DestValNo = ResultVals[ValueCount].second; @@ -3231,38 +3242,38 @@ void CWriter::visitInlineAsm(CallInst &CI) { if (I->isEarlyClobber) C = "&"+C; - + Out << "\"=" << C << "\"(" << GetValueName(DestVal); if (DestValNo != -1) Out << ".field" << DestValNo; // Multiple retvals. Out << ")"; ++ValueCount; } - - + + // Convert over all the input constraints. Out << "\n :"; IsFirst = true; ValueCount = 0; - for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(), + for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), E = Constraints.end(); I != E; ++I) { if (I->Type != InlineAsm::isInput) { ++ValueCount; continue; // Ignore non-input constraints. } - + assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); std::string C = InterpretASMConstraint(*I); if (C.empty()) continue; - + if (!IsFirst) { Out << ", "; IsFirst = false; } - + assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size()); - + Out << "\"" << C << "\"("; if (!I->isIndirect) writeOperand(SrcVal); @@ -3270,10 +3281,10 @@ void CWriter::visitInlineAsm(CallInst &CI) { writeOperandDeref(SrcVal); Out << ")"; } - + // Convert over the clobber constraints. IsFirst = true; - for (std::vector<InlineAsm::ConstraintInfo>::iterator I = Constraints.begin(), + for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), E = Constraints.end(); I != E; ++I) { if (I->Type != InlineAsm::isClobber) continue; // Ignore non-input constraints. @@ -3281,15 +3292,15 @@ void CWriter::visitInlineAsm(CallInst &CI) { assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); std::string C = InterpretASMConstraint(*I); if (C.empty()) continue; - + if (!IsFirst) { Out << ", "; IsFirst = false; } - + Out << '\"' << C << '"'; } - + Out << ")"; } @@ -3308,13 +3319,13 @@ void CWriter::visitAllocaInst(AllocaInst &I) { void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, gep_type_iterator E, bool Static) { - + // If there are no indices, just print out the pointer. if (I == E) { writeOperand(Ptr); return; } - + // Find out if the last index is into a vector. If so, we have to print this // specially. Since vectors can't have elements of indexable type, only the // last index could possibly be of a vector element. @@ -3323,9 +3334,9 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI) LastIndexIsVector = dyn_cast<VectorType>(*TmpI); } - + Out << "("; - + // If the last index is into a vector, we can't print it as &a[i][j] because // we can't index into a vector with j in GCC. Instead, emit this as // (((float*)&a[i])+j) @@ -3334,7 +3345,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType())); Out << ")("; } - + Out << '&'; // If the first index is 0 (very typical) we can do a number of @@ -3444,7 +3455,7 @@ void CWriter::visitStoreInst(StoreInst &I) { if (BitMask) { Out << ") & "; printConstant(BitMask, false); - Out << ")"; + Out << ")"; } } @@ -3477,7 +3488,7 @@ void CWriter::visitInsertElementInst(InsertElementInst &I) { void CWriter::visitExtractElementInst(ExtractElementInst &I) { // We know that our operand is not inlined. Out << "(("; - const Type *EltTy = + const Type *EltTy = cast<VectorType>(I.getOperand(0)->getType())->getElementType(); printType(Out, PointerType::getUnqual(EltTy)); Out << ")(&" << GetValueName(I.getOperand(0)) << "))["; diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt index be24336..a23ff85 100644 --- a/lib/Target/CBackend/CMakeLists.txt +++ b/lib/Target/CBackend/CMakeLists.txt @@ -1,3 +1,5 @@ add_llvm_target(CBackend CBackend.cpp ) + +add_subdirectory(TargetInfo) diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 43ebdac..09b48ce 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -2,14 +2,56 @@ add_llvm_library(LLVMTarget Mangler.cpp SubtargetFeature.cpp Target.cpp + TargetAsmInfo.cpp TargetAsmLexer.cpp TargetData.cpp TargetELFWriterInfo.cpp - TargetFrameInfo.cpp + TargetFrameLowering.cpp TargetInstrInfo.cpp TargetIntrinsicInfo.cpp + TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp TargetRegisterInfo.cpp TargetSubtarget.cpp ) + +set(LLVM_ENUM_ASM_PRINTERS "") +set(LLVM_ENUM_ASM_PARSERS "") +set(LLVM_ENUM_DISASSEMBLERS "") +foreach(t ${LLVM_TARGETS_TO_BUILD}) + message(STATUS "Targeting ${t}") + add_subdirectory(${t}) + set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} ) + file(GLOB asmp_file "${td}/*AsmPrinter.cpp") + if( asmp_file ) + set(LLVM_ENUM_ASM_PRINTERS + "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n") + endif() + if( EXISTS ${td}/AsmParser/CMakeLists.txt ) + set(LLVM_ENUM_ASM_PARSERS + "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n") + endif() + if( EXISTS ${td}/Disassembler/CMakeLists.txt ) + set(LLVM_ENUM_DISASSEMBLERS + "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n") + endif() +endforeach(t) + +# Produce llvm/Config/AsmPrinters.def +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in + ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def + ) + +# Produce llvm/Config/AsmParsers.def +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in + ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def + ) + +# Produce llvm/Config/Disassemblers.def +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in + ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def + ) diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 8a2b59a..0000000 --- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories( - ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - -add_llvm_library(LLVMCellSPUAsmPrinter - SPUAsmPrinter.cpp - ) -add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen) diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile deleted file mode 100644 index 4ec9d04..0000000 --- a/lib/Target/CellSPU/AsmPrinter/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Target/CellSPU/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMCellSPUAsmPrinter - -# Hack: we need to include 'main' CellSPU target directory to grab -# private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp deleted file mode 100644 index 3e95531..0000000 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ /dev/null @@ -1,364 +0,0 @@ -//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to Cell SPU assembly language. This printer -// is the output mechanism used by `llc'. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asmprinter" -#include "SPU.h" -#include "SPUTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class SPUAsmPrinter : public AsmPrinter { - public: - explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : - AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "STI CBEA SPU Assembly Printer"; - } - - SPUTargetMachine &getTM() { - return static_cast<SPUTargetMachine&>(TM); - } - - /// printInstruction - This method is automatically generated by tablegen - /// from the instruction set description. - void printInstruction(const MachineInstr *MI, raw_ostream &OS); - static const char *getRegisterName(unsigned RegNo); - - - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - void printOp(const MachineOperand &MO, raw_ostream &OS); - - /// printRegister - Print register according to target requirements. - /// - void printRegister(const MachineOperand &MO, bool R0AsZero, raw_ostream &O){ - unsigned RegNo = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && - "Not physreg??"); - O << getRegisterName(RegNo); - } - - void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - if (MO.isReg()) { - O << getRegisterName(MO.getReg()); - } else if (MO.isImm()) { - O << MO.getImm(); - } else { - printOp(MO, O); - } - } - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - - void - printS7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - int value = MI->getOperand(OpNo).getImm(); - value = (value << (32 - 7)) >> (32 - 7); - - assert((value >= -(1 << 8) && value <= (1 << 7) - 1) - && "Invalid s7 argument"); - O << value; - } - - void - printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - unsigned int value = MI->getOperand(OpNo).getImm(); - assert(value < (1 << 8) && "Invalid u7 argument"); - O << value; - } - - void - printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - char value = MI->getOperand(OpNo).getImm(); - O << (int) value; - O << "("; - printOperand(MI, OpNo+1, O); - O << ")"; - } - - void - printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - O << (short) MI->getOperand(OpNo).getImm(); - } - - void - printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - O << (unsigned short)MI->getOperand(OpNo).getImm(); - } - - void - printU32ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - O << (unsigned)MI->getOperand(OpNo).getImm(); - } - - void - printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - // When used as the base register, r0 reads constant zero rather than - // the value contained in the register. For this reason, the darwin - // assembler requires that we print r0 as 0 (no r) when used as the base. - const MachineOperand &MO = MI->getOperand(OpNo); - O << getRegisterName(MO.getReg()) << ", "; - printOperand(MI, OpNo+1, O); - } - - void - printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - unsigned int value = MI->getOperand(OpNo).getImm(); - assert(value <= (1 << 19) - 1 && "Invalid u18 argument"); - O << value; - } - - void - printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) - >> 16); - assert((value >= -(1 << 9) && value <= (1 << 9) - 1) - && "Invalid s10 argument"); - O << value; - } - - void - printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) - >> 16); - assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); - O << value; - } - - void - printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - assert(MI->getOperand(OpNo).isImm() && - "printDFormAddr first operand is not immediate"); - int64_t value = int64_t(MI->getOperand(OpNo).getImm()); - int16_t value16 = int16_t(value); - assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1) - && "Invalid dform s10 offset argument"); - O << (value16 & ~0xf) << "("; - printOperand(MI, OpNo+1, O); - O << ")"; - } - - void - printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) - { - /* Note: operand 1 is an offset or symbol name. */ - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - if (MI->getOperand(OpNo+1).isImm()) { - int displ = int(MI->getOperand(OpNo+1).getImm()); - if (displ > 0) - O << "+" << displ; - else if (displ < 0) - O << displ; - } - } - } - - void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printOp(MI->getOperand(OpNo), O); - } - - void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - // Used to generate a ".-<target>", but it turns out that the assembler - // really wants the target. - // - // N.B.: This operand is used for call targets. Branch hints are another - // animal entirely. - printOp(MI->getOperand(OpNo), O); - } - - void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - // HBR operands are generated in front of branches, hence, the - // program counter plus the target. - O << ".+"; - printOp(MI->getOperand(OpNo), O); - } - - void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - O << "@h"; - } - } - - void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - O << "@l"; - } - } - - /// Print local store address - void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printOp(MI->getOperand(OpNo), O); - } - - void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - int value = (int) MI->getOperand(OpNo).getImm(); - assert((value >= 0 && value < 16) - && "Invalid negated immediate rotate 7-bit argument"); - O << -value; - } else { - llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm"); - } - } - - void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){ - assert(MI->getOperand(OpNo).isImm() && - "Invalid/non-immediate rotate amount in printRotateNeg7Imm"); - int value = (int) MI->getOperand(OpNo).getImm(); - assert((value >= 0 && value <= 32) - && "Invalid negated immediate rotate 7-bit argument"); - O << -value; - } - }; -} // end of anonymous namespace - -// Include the auto-generated portion of the assembly writer -#include "SPUGenAsmWriter.inc" - -void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { - switch (MO.getType()) { - case MachineOperand::MO_Immediate: - report_fatal_error("printOp() does not handle immediate values"); - return; - - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - case MachineOperand::MO_ExternalSymbol: - // Computing the address of an external symbol, not calling it. - if (TM.getRelocationModel() != Reloc::Static) { - O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName() - << "$non_lazy_ptr"; - return; - } - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - return; - case MachineOperand::MO_GlobalAddress: - // External or weakly linked global variables need non-lazily-resolved - // stubs - if (TM.getRelocationModel() != Reloc::Static) { - const GlobalValue *GV = MO.getGlobal(); - if (((GV->isDeclaration() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { - O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - return; - } - } - O << *Mang->getSymbol(MO.getGlobal()); - return; - default: - O << "<unknown operand type: " << MO.getType() << ">"; - return; - } -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: return true; // Unknown modifier. - case 'L': // Write second word of DImode reference. - // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNo).isReg() || - OpNo+1 == MI->getNumOperands() || - !MI->getOperand(OpNo+1).isReg()) - return true; - ++OpNo; // Return the high-part. - break; - } - } - - printOperand(MI, OpNo, O); - return false; -} - -bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - printMemRegReg(MI, OpNo, O); - return false; -} - -// Force static initialization. -extern "C" void LLVMInitializeCellSPUAsmPrinter() { - RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget); -} diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index ddfca37..a2a2ef1 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -12,16 +12,18 @@ tablegen(SPUGenSubtarget.inc -gen-subtarget) tablegen(SPUGenCallingConv.inc -gen-callingconv) add_llvm_target(CellSPUCodeGen - SPUFrameInfo.cpp + SPUAsmPrinter.cpp SPUHazardRecognizers.cpp SPUInstrInfo.cpp SPUISelDAGToDAG.cpp SPUISelLowering.cpp + SPUFrameLowering.cpp SPUMCAsmInfo.cpp SPURegisterInfo.cpp SPUSubtarget.cpp SPUTargetMachine.cpp SPUSelectionDAGInfo.cpp + SPUNopFiller.cpp ) -target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG) +add_subdirectory(TargetInfo) diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile index cbdbd3c..77c66be 100644 --- a/lib/Target/CellSPU/Makefile +++ b/lib/Target/CellSPU/Makefile @@ -16,6 +16,6 @@ BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \ SPUGenInstrInfo.inc SPUGenDAGISel.inc \ SPUGenSubtarget.inc SPUGenCallingConv.inc -DIRS = AsmPrinter TargetInfo +DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt index 0e7ad35..3e7e0b6 100644 --- a/lib/Target/CellSPU/README.txt +++ b/lib/Target/CellSPU/README.txt @@ -55,7 +55,7 @@ TODO: * i128 support: * zero extension, any extension: done - * sign extension: needed + * sign extension: done * arithmetic operators (add, sub, mul, div): needed * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index 1f21511..72f8430 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -23,6 +23,7 @@ namespace llvm { class formatted_raw_ostream; FunctionPass *createSPUISelDag(SPUTargetMachine &TM); + FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm); extern Target TheCellSPUTarget; } diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 069a182..5ef5716 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -54,8 +54,8 @@ class I64SETCCNegCond<PatFrag cond, CodeFrag compare>: // The i64 seteq fragment that does the scalar->vector conversion and // comparison: def CEQr64compare: - CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB))), 0xb)>; + CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>; // The i64 seteq fragment that does the vector comparison def CEQv2i64compare: @@ -67,12 +67,14 @@ def CEQv2i64compare: // v2i64 seteq (equality): the setcc result is v4i32 multiclass CompareEqual64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CEQr64compare.Fragment)>; - def v2i64: CodeFrag<(ORi32_v4i32 CEQv2i64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>; + def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CEQv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CEQr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CEQv2i64compare.Fragment), R32C))>; } defm I64EQ: CompareEqual64; @@ -89,10 +91,12 @@ def : I64SELECTNegCond<setne, I64EQr64>; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ def CLGTr64ugt: - CodeFrag<(CLGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; def CLGTr64eq: - CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; def CLGTr64compare: CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, @@ -112,12 +116,14 @@ def CLGTv2i64compare: multiclass CompareLogicalGreaterThan64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CLGTr64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>; def v2i64: CodeFrag<CLGTv2i64compare.Fragment>; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGTv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGTr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>; } defm I64LGT: CompareLogicalGreaterThan64; @@ -144,12 +150,14 @@ def CLGEv2i64compare: multiclass CompareLogicalGreaterEqual64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CLGEr64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>; def v2i64: CodeFrag<CLGEv2i64compare.Fragment>; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CLGEv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGEr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>; } defm I64LGE: CompareLogicalGreaterEqual64; @@ -168,10 +176,12 @@ def : I64SELECTNegCond<setult, I64LGEr64>; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ def CGTr64sgt: - CodeFrag<(CGTv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; def CGTr64eq: - CodeFrag<(CEQv4i32 (ORv2i64_i64 R64C:$rA), (ORv2i64_i64 R64C:$rB))>; + CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; def CGTr64compare: CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, @@ -191,12 +201,14 @@ def CGTv2i64compare: multiclass CompareGreaterThan64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CGTr64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>; def v2i64: CodeFrag<CGTv2i64compare.Fragment>; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGTv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CGTr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CGTv2i64compare.Fragment), R32C))>; } defm I64GT: CompareLogicalGreaterThan64; @@ -223,12 +235,12 @@ def CGEv2i64compare: multiclass CompareGreaterEqual64 { // Plain old comparison, converts back to i32 scalar - def r64: CodeFrag<(ORi32_v4i32 CGEr64compare.Fragment)>; + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>; def v2i64: CodeFrag<CGEv2i64compare.Fragment>; // SELB mask from FSM: - def r64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEr64compare.Fragment))>; - def v2i64mask: CodeFrag<(ORi32_v4i32 (FSMv4i32 CGEv2i64compare.Fragment))>; + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>; } defm I64GE: CompareGreaterEqual64; @@ -255,9 +267,9 @@ class v2i64_add<dag lhs, dag rhs, dag cg_mask>: v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>; def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (ORi64_v2i64 v2i64_add<(ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB), - (v4i32 VECREG:$rCGmask)>.Fragment)>; + (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), (v4i32 VECREG:$rCGmask)), @@ -275,11 +287,12 @@ class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>: CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (ORi64_v2i64 v2i64_sub<(ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB), - v2i64_sub_bg<(ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB)>.Fragment, - (v4i32 VECREG:$rCGmask)>.Fragment)>; + (COPY_TO_REGCLASS + v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), (v4i32 VECREG:$rCGmask)), @@ -374,9 +387,9 @@ class v2i64_mul<dag rA, dag rB, dag rCGmask>: rCGmask>; def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), - (ORi64_v2i64 v2i64_mul<(ORv2i64_i64 R64C:$rA), - (ORv2i64_i64 R64C:$rB), - (v4i32 VECREG:$rCGmask)>.Fragment)>; + (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), (v4i32 VECREG:$rCGmask)), diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp new file mode 100644 index 0000000..4040461 --- /dev/null +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -0,0 +1,327 @@ +//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Cell SPU assembly language. This printer +// is the output mechanism used by `llc'. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asmprinter" +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class SPUAsmPrinter : public AsmPrinter { + public: + explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : + AsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "STI CBEA SPU Assembly Printer"; + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. + void printInstruction(const MachineInstr *MI, raw_ostream &OS); + static const char *getRegisterName(unsigned RegNo); + + + void EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); + } + void printOp(const MachineOperand &MO, raw_ostream &OS); + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + O << getRegisterName(MO.getReg()); + } else if (MO.isImm()) { + O << MO.getImm(); + } else { + printOp(MO, O); + } + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + + void + printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + unsigned int value = MI->getOperand(OpNo).getImm(); + assert(value < (1 << 8) && "Invalid u7 argument"); + O << value; + } + + void + printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + char value = MI->getOperand(OpNo).getImm(); + O << (int) value; + O << "("; + printOperand(MI, OpNo+1, O); + O << ")"; + } + + void + printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + O << (short) MI->getOperand(OpNo).getImm(); + } + + void + printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + O << (unsigned short)MI->getOperand(OpNo).getImm(); + } + + void + printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + // When used as the base register, r0 reads constant zero rather than + // the value contained in the register. For this reason, the darwin + // assembler requires that we print r0 as 0 (no r) when used as the base. + const MachineOperand &MO = MI->getOperand(OpNo); + O << getRegisterName(MO.getReg()) << ", "; + printOperand(MI, OpNo+1, O); + } + + void + printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + unsigned int value = MI->getOperand(OpNo).getImm(); + assert(value <= (1 << 19) - 1 && "Invalid u18 argument"); + O << value; + } + + void + printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) + >> 16); + assert((value >= -(1 << 9) && value <= (1 << 9) - 1) + && "Invalid s10 argument"); + O << value; + } + + void + printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) + >> 16); + assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); + O << value; + } + + void + printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + assert(MI->getOperand(OpNo).isImm() && + "printDFormAddr first operand is not immediate"); + int64_t value = int64_t(MI->getOperand(OpNo).getImm()); + int16_t value16 = int16_t(value); + assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1) + && "Invalid dform s10 offset argument"); + O << (value16 & ~0xf) << "("; + printOperand(MI, OpNo+1, O); + O << ")"; + } + + void + printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + /* Note: operand 1 is an offset or symbol name. */ + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + if (MI->getOperand(OpNo+1).isImm()) { + int displ = int(MI->getOperand(OpNo+1).getImm()); + if (displ > 0) + O << "+" << displ; + else if (displ < 0) + O << displ; + } + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + printOp(MI->getOperand(OpNo), O); + } + + void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + // Used to generate a ".-<target>", but it turns out that the assembler + // really wants the target. + // + // N.B.: This operand is used for call targets. Branch hints are another + // animal entirely. + printOp(MI->getOperand(OpNo), O); + } + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + O << "@h"; + } + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + O << "@l"; + } + } + + /// Print local store address + void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + printOp(MI->getOperand(OpNo), O); + } + + void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) { + int value = (int) MI->getOperand(OpNo).getImm(); + assert((value >= 0 && value < 16) + && "Invalid negated immediate rotate 7-bit argument"); + O << -value; + } else { + llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + } + } + + void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){ + assert(MI->getOperand(OpNo).isImm() && + "Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + int value = (int) MI->getOperand(OpNo).getImm(); + assert((value >= 0 && value <= 32) + && "Invalid negated immediate rotate 7-bit argument"); + O << -value; + } + }; +} // end of anonymous namespace + +// Include the auto-generated portion of the assembly writer +#include "SPUGenAsmWriter.inc" + +void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + report_fatal_error("printOp() does not handle immediate values"); + return; + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_ExternalSymbol: + // Computing the address of an external symbol, not calling it. + if (TM.getRelocationModel() != Reloc::Static) { + O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName() + << "$non_lazy_ptr"; + return; + } + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + return; + case MachineOperand::MO_GlobalAddress: + // External or weakly linked global variables need non-lazily-resolved + // stubs + if (TM.getRelocationModel() != Reloc::Static) { + const GlobalValue *GV = MO.getGlobal(); + if (((GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { + O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + return; + } + } + O << *Mang->getSymbol(MO.getGlobal()); + return; + default: + O << "<unknown operand type: " << MO.getType() << ">"; + return; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + } + } + + printOperand(MI, OpNo, O); + return false; +} + +bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + printMemRegReg(MI, OpNo, O); + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeCellSPUAsmPrinter() { + RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget); +} diff --git a/lib/Target/CellSPU/SPUFrameInfo.cpp b/lib/Target/CellSPU/SPUFrameInfo.cpp deleted file mode 100644 index 60d7ba7..0000000 --- a/lib/Target/CellSPU/SPUFrameInfo.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Top-level implementation for the Cell SPU target. -// -//===----------------------------------------------------------------------===// - -#include "SPU.h" -#include "SPUFrameInfo.h" -#include "SPURegisterNames.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// SPUFrameInfo: -//===----------------------------------------------------------------------===// - -SPUFrameInfo::SPUFrameInfo(const TargetMachine &tm): - TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), - TM(tm) -{ - LR[0].first = SPU::R0; - LR[0].second = 16; -} diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h deleted file mode 100644 index f511acd..0000000 --- a/lib/Target/CellSPU/SPUFrameInfo.h +++ /dev/null @@ -1,75 +0,0 @@ -//===-- SPUFrameInfo.h - Top-level interface for Cell SPU Target -*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains CellSPU frame information that doesn't fit anywhere else -// cleanly... -// -//===----------------------------------------------------------------------===// - -#if !defined(SPUFRAMEINFO_H) - -#include "llvm/Target/TargetFrameInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "SPURegisterInfo.h" - -namespace llvm { - class SPUFrameInfo: public TargetFrameInfo { - const TargetMachine &TM; - std::pair<unsigned, int> LR[1]; - - public: - SPUFrameInfo(const TargetMachine &tm); - - //! Return a function's saved spill slots - /*! - For CellSPU, a function's saved spill slots is just the link register. - */ - const std::pair<unsigned, int> * - getCalleeSaveSpillSlots(unsigned &NumEntries) const; - - //! Stack slot size (16 bytes) - static int stackSlotSize() { - return 16; - } - //! Maximum frame offset representable by a signed 10-bit integer - /*! - This is the maximum frame offset that can be expressed as a 10-bit - integer, used in D-form addresses. - */ - static int maxFrameOffset() { - return ((1 << 9) - 1) * stackSlotSize(); - } - //! Minimum frame offset representable by a signed 10-bit integer - static int minFrameOffset() { - return -(1 << 9) * stackSlotSize(); - } - //! Minimum frame size (enough to spill LR + SP) - static int minStackSize() { - return (2 * stackSlotSize()); - } - //! Convert frame index to stack offset - static int FItoStackOffset(int frame_index) { - return frame_index * stackSlotSize(); - } - //! Number of instructions required to overcome hint-for-branch latency - /*! - HBR (hint-for-branch) instructions can be inserted when, for example, - we know that a given function is going to be called, such as printf(), - in the control flow graph. HBRs are only inserted if a sufficient number - of instructions occurs between the HBR and the target. Currently, HBRs - take 6 cycles, ergo, the magic number 6. - */ - static int branchHintPenalty() { - return 6; - } - }; -} - -#define SPUFRAMEINFO_H 1 -#endif diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp new file mode 100644 index 0000000..432f4a1 --- /dev/null +++ b/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -0,0 +1,276 @@ +//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the Cell SPU target. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUFrameLowering.h" +#include "SPURegisterNames.h" +#include "SPUInstrBuilder.h" +#include "SPUInstrInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// SPUFrameLowering: +//===----------------------------------------------------------------------===// + +SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), + Subtarget(sti) { + LR[0].first = SPU::R0; + LR[0].second = 16; +} + + +//-------------------------------------------------------------------------- +// hasFP - Return true if the specified function actually has a dedicated frame +// pointer register. This is true if the function needs a frame pointer and has +// a non-zero stack size. +bool SPUFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + return MFI->getStackSize() && + (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()); +} + + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + unsigned TargetAlign = getStackAlignment(); + unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment()); + assert(isPowerOf2_32(Align) && "Alignment is not power of 2"); + unsigned AlignMask = Align - 1; + + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + + // Update frame info. + MFI->setStackSize(FrameSize); +} + +void SPUFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const SPUInstrInfo &TII = + *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineModuleInfo &MMI = MF.getMMI(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Prepare for debug frame info. + bool hasDebugInfo = MMI.hasDebugInfo(); + MCSymbol *FrameLabel = 0; + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + + // Work out frame sizes. + determineFrameLayout(MF); + int FrameSize = MFI->getStackSize(); + + assert((FrameSize & 0xf) == 0 + && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); + + // the "empty" frame size is 16 - just the register scavenger spill slot + if (FrameSize > 16 || MFI->adjustsStack()) { + FrameSize = -(FrameSize + SPUFrameLowering::minStackSize()); + if (hasDebugInfo) { + // Mark effective beginning of when frame pointer becomes valid. + FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel); + } + + // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) + // for the ABI + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16) + .addReg(SPU::R1); + if (isInt<10>(FrameSize)) { + // Spill $sp to adjusted $sp + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize) + .addReg(SPU::R1); + // Adjust $sp by required amout + BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) + .addImm(FrameSize); + } else if (isInt<16>(FrameSize)) { + // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use + // $r2 to adjust $sp: + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) + .addImm(-16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) + .addImm(FrameSize); + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1) + .addReg(SPU::R2) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) + .addReg(SPU::R1) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2) + .addReg(SPU::R2) + .addImm(16); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) + .addReg(SPU::R2) + .addReg(SPU::R1); + } else { + report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); + } + + if (hasDebugInfo) { + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + + // Show update of SP. + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == SPU::R0) continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); + } + + // Mark effective beginning of when frame pointer is ready. + MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel); + + MachineLocation FPDst(SPU::R1); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); + } + } else { + // This is a leaf function -- insert a branch hint iff there are + // sufficient number instructions in the basic block. Note that + // this is just a best guess based on the basic block's size. + if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + dl = MBBI->getDebugLoc(); + + // Insert terminator label + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)) + .addSym(MMI.getContext().CreateTempSymbol()); + } + } +} + +void SPUFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const SPUInstrInfo &TII = + *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo()); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int FrameSize = MFI->getStackSize(); + int LinkSlotOffset = SPUFrameLowering::stackSlotSize(); + DebugLoc dl = MBBI->getDebugLoc(); + + assert(MBBI->getOpcode() == SPU::RET && + "Can only insert epilog into returning blocks"); + assert((FrameSize & 0xf) == 0 && "FrameSize not aligned"); + + // the "empty" frame size is 16 - just the register scavenger spill slot + if (FrameSize > 16 || MFI->adjustsStack()) { + FrameSize = FrameSize + SPUFrameLowering::minStackSize(); + if (isInt<10>(FrameSize + LinkSlotOffset)) { + // Reload $lr, adjust $sp by required amount + // Note: We do this to slightly improve dual issue -- not by much, but it + // is an opportunity for dual issue. + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) + .addImm(FrameSize + LinkSlotOffset) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1) + .addReg(SPU::R1) + .addImm(FrameSize); + } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use + // $r2 to adjust $sp: + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) + .addImm(16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) + .addImm(FrameSize); + BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) + .addReg(SPU::R1) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) + .addImm(16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). + addReg(SPU::R2) + .addImm(16); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) + .addReg(SPU::R2) + .addReg(SPU::R1); + } else { + report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); + } + } +} + +void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) + const { + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(SPU::R1, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + +void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const{ + // Mark LR and SP unused, since the prolog spills them to stack and + // we don't want anyone else to spill them for us. + // + // Also, unless R2 is really used someday, don't spill it automatically. + MF.getRegInfo().setPhysRegUnused(SPU::R0); + MF.getRegInfo().setPhysRegUnused(SPU::R1); + MF.getRegInfo().setPhysRegUnused(SPU::R2); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &SPU::R32CRegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); +} diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h new file mode 100644 index 0000000..4fee72d --- /dev/null +++ b/lib/Target/CellSPU/SPUFrameLowering.h @@ -0,0 +1,94 @@ +//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains CellSPU frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_FRAMEINFO_H +#define SPU_FRAMEINFO_H + +#include "SPURegisterInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class SPUSubtarget; + + class SPUFrameLowering: public TargetFrameLowering { + const SPUSubtarget &Subtarget; + std::pair<unsigned, int> LR[1]; + + public: + SPUFrameLowering(const SPUSubtarget &sti); + + //! Determine the frame's layour + void determineFrameLayout(MachineFunction &MF) const; + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + //! Prediate: Target has dedicated frame pointer + bool hasFP(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + //! Perform target-specific stack frame setup. + void getInitialFrameState(std::vector<MachineMove> &Moves) const; + + //! Return a function's saved spill slots + /*! + For CellSPU, a function's saved spill slots is just the link register. + */ + const std::pair<unsigned, int> * + getCalleeSaveSpillSlots(unsigned &NumEntries) const; + + //! Stack slot size (16 bytes) + static int stackSlotSize() { + return 16; + } + //! Maximum frame offset representable by a signed 10-bit integer + /*! + This is the maximum frame offset that can be expressed as a 10-bit + integer, used in D-form addresses. + */ + static int maxFrameOffset() { + return ((1 << 9) - 1) * stackSlotSize(); + } + //! Minimum frame offset representable by a signed 10-bit integer + static int minFrameOffset() { + return -(1 << 9) * stackSlotSize(); + } + //! Minimum frame size (enough to spill LR + SP) + static int minStackSize() { + return (2 * stackSlotSize()); + } + //! Convert frame index to stack offset + static int FItoStackOffset(int frame_index) { + return frame_index * stackSlotSize(); + } + //! Number of instructions required to overcome hint-for-branch latency + /*! + HBR (hint-for-branch) instructions can be inserted when, for example, + we know that a given function is going to be called, such as printf(), + in the control flow graph. HBRs are only inserted if a sufficient number + of instructions occurs between the HBR and the target. Currently, HBRs + take 6 cycles, ergo, the magic number 6. + */ + static int branchHintPenalty() { + return 6; + } + }; +} + +#endif diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp index 9dbab1d..403d7ef 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp +++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp @@ -41,12 +41,14 @@ SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) : /// /// \return NoHazard ScheduleHazardRecognizer::HazardType -SPUHazardRecognizer::getHazardType(SUnit *SU) +SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Initial thoughts on how to do this, but this code cannot work unless the // function's prolog and epilog code are also being scheduled so that we can // accurately determine which pipeline is being scheduled. #if 0 + assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead"); + const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); ScheduleHazardRecognizer::HazardType retval = NoHazard; bool mustBeOdd = false; diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h index d0ae2d8..675632c 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.h +++ b/lib/Target/CellSPU/SPUHazardRecognizers.h @@ -20,7 +20,7 @@ namespace llvm { class TargetInstrInfo; - + /// SPUHazardRecognizer class SPUHazardRecognizer : public ScheduleHazardRecognizer { @@ -30,7 +30,7 @@ private: public: SPUHazardRecognizer(const TargetInstrInfo &TII); - virtual HazardType getHazardType(SUnit *SU); + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); virtual void EmitNoop(); diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 2f15984..d226156 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -15,7 +15,7 @@ #include "SPU.h" #include "SPUTargetMachine.h" #include "SPUHazardRecognizers.h" -#include "SPUFrameInfo.h" +#include "SPUFrameLowering.h" #include "SPURegisterNames.h" #include "SPUTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -111,55 +111,6 @@ namespace { return false; } - //===------------------------------------------------------------------===// - //! EVT to "useful stuff" mapping structure: - - struct valtype_map_s { - EVT VT; - unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined) - bool ldresult_imm; /// LDRESULT instruction requires immediate? - unsigned lrinst; /// LR instruction - }; - - const valtype_map_s valtype_map[] = { - { MVT::i8, SPU::ORBIr8, true, SPU::LRr8 }, - { MVT::i16, SPU::ORHIr16, true, SPU::LRr16 }, - { MVT::i32, SPU::ORIr32, true, SPU::LRr32 }, - { MVT::i64, SPU::ORr64, false, SPU::LRr64 }, - { MVT::f32, SPU::ORf32, false, SPU::LRf32 }, - { MVT::f64, SPU::ORf64, false, SPU::LRf64 }, - // vector types... (sigh!) - { MVT::v16i8, 0, false, SPU::LRv16i8 }, - { MVT::v8i16, 0, false, SPU::LRv8i16 }, - { MVT::v4i32, 0, false, SPU::LRv4i32 }, - { MVT::v2i64, 0, false, SPU::LRv2i64 }, - { MVT::v4f32, 0, false, SPU::LRv4f32 }, - { MVT::v2f64, 0, false, SPU::LRv2f64 } - }; - - const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); - - const valtype_map_s *getValueTypeMapEntry(EVT VT) - { - const valtype_map_s *retval = 0; - for (size_t i = 0; i < n_valtype_map; ++i) { - if (valtype_map[i].VT == VT) { - retval = valtype_map + i; - break; - } - } - - -#ifndef NDEBUG - if (retval == 0) { - report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns" - "NULL for " + Twine(VT.getEVTString())); - } -#endif - - return retval; - } - //! Generate the carry-generate shuffle mask. SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { SmallVector<SDValue, 16 > ShufBytes; @@ -221,16 +172,10 @@ namespace { return CurDAG->getTargetConstant(Imm, MVT::i32); } - /// getI64Imm - Return a target constant with the specified value, of type - /// i64. - inline SDValue getI64Imm(uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i64); - } - /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); - } + } SDNode *emitBuildVector(SDNode *bvNode) { EVT vecVT = bvNode->getValueType(0); @@ -268,10 +213,10 @@ namespace { unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); SDValue CGPoolOffset = SPU::LowerConstantPool(CPIdx, *CurDAG, TM); - + HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, CurDAG->getEntryNode(), CGPoolOffset, - PseudoSourceValue::getConstantPool(),0, + MachinePointerInfo::getConstantPool(), false, false, Alignment)); CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) @@ -356,13 +301,8 @@ namespace { return "Cell SPU DAG->DAG Pattern Instruction Selection"; } - /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for - /// this target when scheduling the DAG. - virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { - const TargetInstrInfo *II = TM.getInstrInfo(); - assert(II && "No InstrInfo?"); - return new SPUHazardRecognizer(*II); - } + private: + SDValue getRC( MVT ); // Include the pieces autogenerated from the target description. #include "SPUGenDAGISel.inc" @@ -450,8 +390,8 @@ bool SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { return DFormAddressPredicate(Op, N, Base, Index, - SPUFrameInfo::minFrameOffset(), - SPUFrameInfo::maxFrameOffset()); + SPUFrameLowering::minFrameOffset(), + SPUFrameLowering::maxFrameOffset()); } bool @@ -467,7 +407,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = " << FI << "\n"); - if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { + if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { Base = CurDAG->getTargetConstant(0, PtrTy); Index = CurDAG->getTargetFrameIndex(FI, PtrTy); return true; @@ -493,7 +433,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); - if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { + if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { Base = CurDAG->getTargetConstant(offset, PtrTy); Index = CurDAG->getTargetFrameIndex(FI, PtrTy); return true; @@ -514,7 +454,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); - if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { + if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { Base = CurDAG->getTargetConstant(offset, PtrTy); Index = CurDAG->getTargetFrameIndex(FI, PtrTy); return true; @@ -564,8 +504,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, Base = CurDAG->getTargetConstant(0, N.getValueType()); Index = N; return true; - } else if (Opc == ISD::Register - ||Opc == ISD::CopyFromReg + } else if (Opc == ISD::Register + ||Opc == ISD::CopyFromReg ||Opc == ISD::UNDEF ||Opc == ISD::Constant) { unsigned OpOpc = Op->getOpcode(); @@ -625,6 +565,46 @@ SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, return false; } +/*! + Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue + to be used as the last parameter of a +CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call + \arg VT the value type for which we want a register class +*/ +SDValue SPUDAGToDAGISel::getRC( MVT VT ) { + switch( VT.SimpleTy ) { + case MVT::i8: + return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32); + break; + case MVT::i16: + return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32); + break; + case MVT::i32: + return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32); + break; + case MVT::f32: + return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32); + break; + case MVT::i64: + return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32); + break; + case MVT::i128: + return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32); + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + case MVT::v2i64: + case MVT::v2f64: + return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32); + break; + default: + assert( false && "add a new case here" ); + } + return SDValue(); +} + //! Convert the operand from a target-independent to a target-specific node /*! */ @@ -632,7 +612,7 @@ SDNode * SPUDAGToDAGISel::Select(SDNode *N) { unsigned Opc = N->getOpcode(); int n_ops = -1; - unsigned NewOpc; + unsigned NewOpc = 0; EVT OpVT = N->getValueType(0); SDValue Ops[8]; DebugLoc dl = N->getDebugLoc(); @@ -654,7 +634,7 @@ SPUDAGToDAGISel::Select(SDNode *N) { NewOpc = SPU::Ar32; Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0)); Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, - N->getValueType(0), TFI, Imm0), + N->getValueType(0), TFI), 0); n_ops = 2; } @@ -669,7 +649,7 @@ SPUDAGToDAGISel::Select(SDNode *N) { EVT Op0VT = Op0.getValueType(); EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), Op0VT, (128 / Op0VT.getSizeInBits())); - EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), + EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); SDValue shufMask; @@ -703,19 +683,19 @@ SPUDAGToDAGISel::Select(SDNode *N) { } SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); - + HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, Op0VecVT, Op0)); - + SDValue PromScalar; if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode())) PromScalar = SDValue(N, 0); else PromScalar = PromoteScalar.getValue(); - + SDValue zextShuffle = CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, - PromScalar, PromScalar, + PromScalar, PromScalar, SDValue(shufMaskLoad, 0)); HandleSDNode Dummy2(zextShuffle); @@ -725,7 +705,7 @@ SPUDAGToDAGISel::Select(SDNode *N) { zextShuffle = Dummy2.getValue(); HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, zextShuffle)); - + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); SelectCode(Dummy.getValue().getNode()); return Dummy.getValue().getNode(); @@ -736,7 +716,7 @@ SPUDAGToDAGISel::Select(SDNode *N) { HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, N->getOperand(0), N->getOperand(1), SDValue(CGLoad, 0))); - + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; @@ -748,7 +728,7 @@ SPUDAGToDAGISel::Select(SDNode *N) { HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, N->getOperand(0), N->getOperand(1), SDValue(CGLoad, 0))); - + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; @@ -779,8 +759,8 @@ SPUDAGToDAGISel::Select(SDNode *N) { if (shift_amt >= 32) { SDNode *hi32 = - CurDAG->getMachineNode(SPU::ORr32_r64, dl, OpVT, - Op0.getOperand(0)); + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + Op0.getOperand(0), getRC(MVT::i32)); shift_amt -= 32; if (shift_amt > 0) { @@ -862,23 +842,12 @@ SPUDAGToDAGISel::Select(SDNode *N) { SDValue Arg = N->getOperand(0); SDValue Chain = N->getOperand(1); SDNode *Result; - const valtype_map_s *vtm = getValueTypeMapEntry(VT); - - if (vtm->ldresult_ins == 0) { - report_fatal_error("LDRESULT for unsupported type: " + - Twine(VT.getEVTString())); - } - - Opc = vtm->ldresult_ins; - if (vtm->ldresult_imm) { - SDValue Zero = CurDAG->getTargetConstant(0, VT); - - Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain); - } else { - Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain); - } + Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT, + MVT::Other, Arg, + getRC( VT.getSimpleVT()), Chain); return Result; + } else if (Opc == SPUISD::IndirectAddr) { // Look at the operands: SelectCode() will catch the cases that aren't // specifically handled here. @@ -904,10 +873,10 @@ SPUDAGToDAGISel::Select(SDNode *N) { NewOpc = SPU::AIr32; Ops[1] = Op1; } else { - Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl, - N->getValueType(0), + Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl, + N->getValueType(0), Op1), - 0); + 0); } } Ops[0] = Op0; @@ -939,7 +908,7 @@ SPUDAGToDAGISel::Select(SDNode *N) { SDNode * SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { SDValue Op0 = N->getOperand(0); - EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), + EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); SDValue ShiftAmt = N->getOperand(1); EVT ShiftAmtVT = ShiftAmt.getValueType(); @@ -947,7 +916,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { SDValue SelMaskVal; DebugLoc dl = N->getDebugLoc(); - VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); + VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, + Op0, getRC(MVT::v2i64) ); SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal); ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT, @@ -991,7 +961,8 @@ SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { SDValue(Shift, 0), SDValue(Bits, 0)); } - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); } /*! @@ -1012,7 +983,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { SDNode *VecOp0, *Shift = 0; DebugLoc dl = N->getDebugLoc(); - VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); + VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, + Op0, getRC(MVT::v2i64) ); if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { unsigned bytes = unsigned(CN->getZExtValue()) >> 3; @@ -1058,7 +1030,8 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { SDValue(Shift, 0), SDValue(Bits, 0)); } - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); } /*! @@ -1072,21 +1045,23 @@ SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { SDNode * SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { // Promote Op0 to vector - EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), + EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, (128 / OpVT.getSizeInBits())); SDValue ShiftAmt = N->getOperand(1); EVT ShiftAmtVT = ShiftAmt.getValueType(); DebugLoc dl = N->getDebugLoc(); SDNode *VecOp0 = - CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, N->getOperand(0)); + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + VecVT, N->getOperand(0), getRC(MVT::v2i64)); SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); SDNode *SignRot = CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64, SDValue(VecOp0, 0), SignRotAmt); SDNode *UpperHalfSign = - CurDAG->getMachineNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0)); + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32)); SDNode *UpperHalfSignMask = CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0)); @@ -1133,7 +1108,8 @@ SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { SDValue(Shift, 0), SDValue(NegShift, 0)); } - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); } /*! @@ -1154,20 +1130,21 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, // Here's where it gets interesting, because we have to parse out the // subtree handed back in i64vec: - if (i64vec.getOpcode() == ISD::BIT_CONVERT) { + if (i64vec.getOpcode() == ISD::BITCAST) { // The degenerate case where the upper and lower bits in the splat are // identical: SDValue Op0 = i64vec.getOperand(0); ReplaceUses(i64vec, Op0); - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(Op0.getNode()), 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + SDValue(emitBuildVector(Op0.getNode()), 0), + getRC(MVT::i64)); } else if (i64vec.getOpcode() == SPUISD::SHUFB) { SDValue lhs = i64vec.getOperand(0); SDValue rhs = i64vec.getOperand(1); SDValue shufmask = i64vec.getOperand(2); - if (lhs.getOpcode() == ISD::BIT_CONVERT) { + if (lhs.getOpcode() == ISD::BITCAST) { ReplaceUses(lhs, lhs.getOperand(0)); lhs = lhs.getOperand(0); } @@ -1176,7 +1153,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, ? lhs.getNode() : emitBuildVector(lhs.getNode())); - if (rhs.getOpcode() == ISD::BIT_CONVERT) { + if (rhs.getOpcode() == ISD::BITCAST) { ReplaceUses(rhs, rhs.getOperand(0)); rhs = rhs.getOperand(0); } @@ -1185,7 +1162,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, ? rhs.getNode() : emitBuildVector(rhs.getNode())); - if (shufmask.getOpcode() == ISD::BIT_CONVERT) { + if (shufmask.getOpcode() == ISD::BITCAST) { ReplaceUses(shufmask, shufmask.getOperand(0)); shufmask = shufmask.getOperand(0); } @@ -1201,11 +1178,13 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, HandleSDNode Dummy(shufNode); SDNode *SN = SelectCode(Dummy.getValue().getNode()); if (SN == 0) SN = Dummy.getValue().getNode(); - - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0)); + + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(SN, 0), getRC(MVT::i64)); } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { - return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(i64vec.getNode()), 0)); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + SDValue(emitBuildVector(i64vec.getNode()), 0), + getRC(MVT::i64)); } else { report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" "condition"); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 46f3189..e6511d0 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1,4 +1,3 @@ -// //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// // The LLVM Compiler Infrastructure // @@ -14,12 +13,13 @@ #include "SPURegisterNames.h" #include "SPUISelLowering.h" #include "SPUTargetMachine.h" -#include "SPUFrameInfo.h" +#include "SPUFrameLowering.h" #include "SPUMachineFunction.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" +#include "llvm/Type.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -41,41 +41,12 @@ using namespace llvm; namespace { std::map<unsigned, const char *> node_names; - //! EVT mapping to useful data for Cell SPU - struct valtype_map_s { - EVT valtype; - int prefslot_byte; - }; - - const valtype_map_s valtype_map[] = { - { MVT::i1, 3 }, - { MVT::i8, 3 }, - { MVT::i16, 2 }, - { MVT::i32, 0 }, - { MVT::f32, 0 }, - { MVT::i64, 0 }, - { MVT::f64, 0 }, - { MVT::i128, 0 } - }; - - const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); - - const valtype_map_s *getValueTypeMapEntry(EVT VT) { - const valtype_map_s *retval = 0; - - for (size_t i = 0; i < n_valtype_map; ++i) { - if (valtype_map[i].valtype == VT) { - retval = valtype_map + i; - break; - } - } - -#ifndef NDEBUG - if (retval == 0) { - report_fatal_error("getValueTypeMapEntry returns NULL for " + - Twine(VT.getEVTString())); - } -#endif + // Byte offset of the preferred slot (counted from the MSB) + int prefslotOffset(EVT VT) { + int retval=0; + if (VT==MVT::i1) retval=3; + if (VT==MVT::i8) retval=3; + if (VT==MVT::i16) retval=2; return retval; } @@ -125,8 +96,6 @@ namespace { SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()), SPUTM(TM) { - // Fold away setcc operations if possible. - setPow2DivIsCheap(); // Use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(true); @@ -376,10 +345,10 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); - setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); - setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); - setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); + setOperationAction(ISD::BITCAST, MVT::i32, Legal); + setOperationAction(ISD::BITCAST, MVT::f32, Legal); + setOperationAction(ISD::BITCAST, MVT::i64, Legal); + setOperationAction(ISD::BITCAST, MVT::f64, Legal); // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -439,9 +408,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::AND, VT, Legal); setOperationAction(ISD::OR, VT, Legal); setOperationAction(ISD::XOR, VT, Legal); - setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::SELECT, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); + setOperationAction(ISD::STORE, VT, Custom); // These operations need to be expanded: setOperationAction(ISD::SDIV, VT, Expand); @@ -502,8 +471,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; - node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; - node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; + node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS"; + node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES"; node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; @@ -531,10 +500,20 @@ unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { //===----------------------------------------------------------------------===// MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { - // i16 and i32 are valid SETCC result types - return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? - VT.getSimpleVT().SimpleTy : - MVT::i32); + // i8, i16 and i32 are valid SETCC result types + MVT::SimpleValueType retval; + + switch(VT.getSimpleVT().SimpleTy){ + case MVT::i1: + case MVT::i8: + retval = MVT::i8; break; + case MVT::i16: + retval = MVT::i16; break; + case MVT::i32: + default: + retval = MVT::i32; + } + return retval; } //===----------------------------------------------------------------------===// @@ -572,113 +551,174 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT OutVT = Op.getValueType(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); - const valtype_map_s *vtm = getValueTypeMapEntry(InVT); + int pso = prefslotOffset(InVT); DebugLoc dl = Op.getDebugLoc(); - - switch (LN->getAddressingMode()) { - case ISD::UNINDEXED: { - SDValue result; - SDValue basePtr = LN->getBasePtr(); - SDValue rotate; - - if (alignment == 16) { - ConstantSDNode *CN; - - // Special cases for a known aligned load to simplify the base pointer - // and the rotation amount: - if (basePtr.getOpcode() == ISD::ADD - && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { - // Known offset into basePtr - int64_t offset = CN->getSExtValue(); - int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); - - if (rotamt < 0) - rotamt += 16; - - rotate = DAG.getConstant(rotamt, MVT::i16); - - // Simplify the base pointer for this case: - basePtr = basePtr.getOperand(0); - if ((offset & ~0xf) > 0) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & ~0xf), PtrVT)); - } - } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) - || (basePtr.getOpcode() == SPUISD::IndirectAddr - && basePtr.getOperand(0).getOpcode() == SPUISD::Hi - && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { - // Plain aligned a-form address: rotate into preferred slot - // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) - int64_t rotamt = -vtm->prefslot_byte; - if (rotamt < 0) - rotamt += 16; - rotate = DAG.getConstant(rotamt, MVT::i16); - } else { - // Offset the rotate amount by the basePtr and the preferred slot - // byte offset - int64_t rotamt = -vtm->prefslot_byte; - if (rotamt < 0) - rotamt += 16; - rotate = DAG.getNode(ISD::ADD, dl, PtrVT, - basePtr, - DAG.getConstant(rotamt, PtrVT)); - } - } else { - // Unaligned load: must be more pessimistic about addressing modes: - if (basePtr.getOpcode() == ISD::ADD) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDValue Flag; - - SDValue Op0 = basePtr.getOperand(0); - SDValue Op1 = basePtr.getOperand(1); - - if (isa<ConstantSDNode>(Op1)) { - // Convert the (add <ptr>, <const>) to an indirect address contained - // in a register. Note that this is done because we need to avoid - // creating a 0(reg) d-form address due to the SPU's block loads. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); - basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); - } else { - // Convert the (add <arg1>, <arg2>) to an indirect address, which - // will likely be lowered as a reg(reg) x-form address. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - } - } else { + EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT, + (128 / InVT.getSizeInBits())); + + // two sanity checks + assert( LN->getAddressingMode() == ISD::UNINDEXED + && "we should get only UNINDEXED adresses"); + // clean aligned loads can be selected as-is + if (InVT.getSizeInBits() == 128 && (alignment%16) == 0) + return SDValue(); + + // Get pointerinfos to the memory chunk(s) that contain the data to load + uint64_t mpi_offset = LN->getPointerInfo().Offset; + mpi_offset -= mpi_offset%16; + MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset); + MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16); + + SDValue result; + SDValue basePtr = LN->getBasePtr(); + SDValue rotate; + + if ((alignment%16) == 0) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and the rotation amount: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + int64_t rotamt = int64_t((offset & 0xf) - pso); + + if (rotamt < 0) + rotamt += 16; + + rotate = DAG.getConstant(rotamt, MVT::i16); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + if ((offset & ~0xf) > 0) { basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, basePtr, - DAG.getConstant(0, PtrVT)); + DAG.getConstant((offset & ~0xf), PtrVT)); } - + } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) + || (basePtr.getOpcode() == SPUISD::IndirectAddr + && basePtr.getOperand(0).getOpcode() == SPUISD::Hi + && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { + // Plain aligned a-form address: rotate into preferred slot + // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) + int64_t rotamt = -pso; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getConstant(rotamt, MVT::i16); + } else { // Offset the rotate amount by the basePtr and the preferred slot // byte offset + int64_t rotamt = -pso; + if (rotamt < 0) + rotamt += 16; rotate = DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, - DAG.getConstant(-vtm->prefslot_byte, PtrVT)); + DAG.getConstant(rotamt, PtrVT)); } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa<ConstantSDNode>(Op1)) { + // Convert the (add <ptr>, <const>) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add <arg1>, <arg2>) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(-pso, PtrVT)); + } - // Re-emit as a v16i8 vector load - result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, - LN->getSrcValue(), LN->getSrcValueOffset(), - LN->isVolatile(), LN->isNonTemporal(), 16); + // Do the load as a i128 to allow possible shifting + SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, + lowMemPtr, + LN->isVolatile(), LN->isNonTemporal(), 16); + // When the size is not greater than alignment we get all data with just + // one load + if (alignment >= InVT.getSizeInBits()/8) { // Update the chain - the_chain = result.getValue(1); + the_chain = low.getValue(1); // Rotate into the preferred slot: - result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, - result.getValue(0), rotate); + result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128, + low.getValue(0), rotate); // Convert the loaded v16i8 vector to the appropriate vector type // specified by the operand: - EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), InVT, (128 / InVT.getSizeInBits())); result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, - DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); + DAG.getNode(ISD::BITCAST, dl, vecVT, result)); + } + // When alignment is less than the size, we might need (known only at + // run-time) two loads + // TODO: if the memory address is composed only from constants, we have + // extra kowledge, and might avoid the second load + else { + // storage position offset from lower 16 byte aligned memory chunk + SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, + basePtr, DAG.getConstant( 0xf, MVT::i32 ) ); + // get a registerfull of ones. (this implementation is a workaround: LLVM + // cannot handle 128 bit signed int constants) + SDValue ones = DAG.getConstant(-1, MVT::v4i32 ); + ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); + + SDValue high = DAG.getLoad(MVT::i128, dl, the_chain, + DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(16, PtrVT)), + highMemPtr, + LN->isVolatile(), LN->isNonTemporal(), 16); + + the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), + high.getValue(1)); + + // Shift the (possible) high part right to compensate the misalignemnt. + // if there is no highpart (i.e. value is i64 and offset is 4), this + // will zero out the high value. + high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high, + DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + offset + )); + + // Shift the low similarily + // TODO: add SPUISD::SHL_BYTES + low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); + + // Merge the two parts + result = DAG.getNode(ISD::BITCAST, dl, vecVT, + DAG.getNode(ISD::OR, dl, MVT::i128, low, high)); + + if (!InVT.isVector()) { + result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result ); + } + } // Handle extending loads by extending the scalar result: if (ExtType == ISD::SEXTLOAD) { result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); @@ -702,21 +742,6 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, retops, sizeof(retops) / sizeof(retops[0])); return result; - } - case ISD::PRE_INC: - case ISD::PRE_DEC: - case ISD::POST_INC: - case ISD::POST_DEC: - case ISD::LAST_INDEXED_MODE: - { - report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " - "than UNINDEXED\n" + - Twine((unsigned)LN->getAddressingMode())); - /*NOTREACHED*/ - } - } - - return SDValue(); } /// Custom lower stores for CellSPU @@ -734,93 +759,103 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned alignment = SN->getAlignment(); + SDValue result; + EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT, + (128 / StVT.getSizeInBits())); + // Get pointerinfos to the memory chunk(s) that contain the data to load + uint64_t mpi_offset = SN->getPointerInfo().Offset; + mpi_offset -= mpi_offset%16; + MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset); + MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16); + + + // two sanity checks + assert( SN->getAddressingMode() == ISD::UNINDEXED + && "we should get only UNINDEXED adresses"); + // clean aligned loads can be selected as-is + if (StVT.getSizeInBits() == 128 && (alignment%16) == 0) + return SDValue(); + + SDValue alignLoadVec; + SDValue basePtr = SN->getBasePtr(); + SDValue the_chain = SN->getChain(); + SDValue insertEltOffs; + + if ((alignment%16) == 0) { + ConstantSDNode *CN; + // Special cases for a known aligned load to simplify the base pointer + // and insertion byte: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & 0xf), PtrVT)); - switch (SN->getAddressingMode()) { - case ISD::UNINDEXED: { - // The vector type we really want to load from the 16-byte chunk. - EVT vecVT = EVT::getVectorVT(*DAG.getContext(), - VT, (128 / VT.getSizeInBits())); - - SDValue alignLoadVec; - SDValue basePtr = SN->getBasePtr(); - SDValue the_chain = SN->getChain(); - SDValue insertEltOffs; - - if (alignment == 16) { - ConstantSDNode *CN; - // Special cases for a known aligned load to simplify the base pointer - // and insertion byte: - if (basePtr.getOpcode() == ISD::ADD - && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { - // Known offset into basePtr - int64_t offset = CN->getSExtValue(); - - // Simplify the base pointer for this case: - basePtr = basePtr.getOperand(0); - insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & 0xf), PtrVT)); - - if ((offset & ~0xf) > 0) { - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant((offset & ~0xf), PtrVT)); - } - } else { - // Otherwise, assume it's at byte 0 of basePtr - insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, - basePtr, - DAG.getConstant(0, PtrVT)); - } - } else { - // Unaligned load: must be more pessimistic about addressing modes: - if (basePtr.getOpcode() == ISD::ADD) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - SDValue Flag; - - SDValue Op0 = basePtr.getOperand(0); - SDValue Op1 = basePtr.getOperand(1); - - if (isa<ConstantSDNode>(Op1)) { - // Convert the (add <ptr>, <const>) to an indirect address contained - // in a register. Note that this is done because we need to avoid - // creating a 0(reg) d-form address due to the SPU's block loads. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); - basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); - } else { - // Convert the (add <arg1>, <arg2>) to an indirect address, which - // will likely be lowered as a reg(reg) x-form address. - basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); - } - } else { + if ((offset & ~0xf) > 0) { basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, basePtr, - DAG.getConstant(0, PtrVT)); + DAG.getConstant((offset & ~0xf), PtrVT)); } - - // Insertion point is solely determined by basePtr's contents - insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, + } else { + // Otherwise, assume it's at byte 0 of basePtr + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa<ConstantSDNode>(Op1)) { + // Convert the (add <ptr>, <const>) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add <arg1>, <arg2>) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } - // Load the memory to which to store. - alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr, - SN->getSrcValue(), SN->getSrcValueOffset(), - SN->isVolatile(), SN->isNonTemporal(), 16); + // Insertion point is solely determined by basePtr's contents + insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + // Load the lower part of the memory to which to store. + SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, + lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16); + + // if we don't need to store over the 16 byte boundary, one store suffices + if (alignment >= StVT.getSizeInBits()/8) { // Update the chain - the_chain = alignLoadVec.getValue(1); + the_chain = low.getValue(1); - LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); + LoadSDNode *LN = cast<LoadSDNode>(low); SDValue theValue = SN->getValue(); - SDValue result; if (StVT != VT && (theValue.getOpcode() == ISD::AssertZext @@ -844,48 +879,114 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); - SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, + SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, - vectorizeOp, alignLoadVec, - DAG.getNode(ISD::BIT_CONVERT, dl, + vectorizeOp, low, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, insertEltOp)); result = DAG.getStore(the_chain, dl, result, basePtr, - LN->getSrcValue(), LN->getSrcValueOffset(), + lowMemPtr, LN->isVolatile(), LN->isNonTemporal(), - LN->getAlignment()); - -#if 0 && !defined(NDEBUG) - if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - const SDValue ¤tRoot = DAG.getRoot(); - - DAG.setRoot(result); - errs() << "------- CellSPU:LowerStore result:\n"; - DAG.dump(); - errs() << "-------\n"; - DAG.setRoot(currentRoot); - } -#endif - - return result; - /*UNREACHED*/ - } - case ISD::PRE_INC: - case ISD::PRE_DEC: - case ISD::POST_INC: - case ISD::POST_DEC: - case ISD::LAST_INDEXED_MODE: - { - report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " - "than UNINDEXED\n" + - Twine((unsigned)SN->getAddressingMode())); - /*NOTREACHED*/ - } + 16); + + } + // do the store when it might cross the 16 byte memory access boundary. + else { + // TODO issue a warning if SN->isVolatile()== true? This is likely not + // what the user wanted. + + // address offset from nearest lower 16byte alinged address + SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, + SN->getBasePtr(), + DAG.getConstant(0xf, MVT::i32)); + // 16 - offset + SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + offset); + // 16 - sizeof(Value) + SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + DAG.getConstant( VT.getSizeInBits()/8, + MVT::i32)); + // get a registerfull of ones + SDValue ones = DAG.getConstant(-1, MVT::v4i32); + ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); + + // Create the 128 bit masks that have ones where the data to store is + // located. + SDValue lowmask, himask; + // if the value to store don't fill up the an entire 128 bits, zero + // out the last bits of the mask so that only the value we want to store + // is masked. + // this is e.g. in the case of store i32, align 2 + if (!VT.isVector()){ + Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value); + lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus); + lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, + surplus); + Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); + Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask); + + } + else { + lowmask = ones; + Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); + } + // this will zero, if there are no data that goes to the high quad + himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, + offset_compl); + lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask, + offset); + + // Load in the old data and zero out the parts that will be overwritten with + // the new data to store. + SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain, + DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, + DAG.getConstant( 16, PtrVT)), + highMemPtr, + SN->isVolatile(), SN->isNonTemporal(), 16); + the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), + hi.getValue(1)); + + low = DAG.getNode(ISD::AND, dl, MVT::i128, + DAG.getNode( ISD::BITCAST, dl, MVT::i128, low), + DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones)); + hi = DAG.getNode(ISD::AND, dl, MVT::i128, + DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi), + DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones)); + + // Shift the Value to store into place. rlow contains the parts that go to + // the lower memory chunk, rhi has the parts that go to the upper one. + SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset); + rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask); + SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value, + offset_compl); + + // Merge the old data and the new data and store the results + // Need to convert vectors here to integer as 'OR'ing floats assert + rlow = DAG.getNode(ISD::OR, dl, MVT::i128, + DAG.getNode(ISD::BITCAST, dl, MVT::i128, low), + DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow)); + rhi = DAG.getNode(ISD::OR, dl, MVT::i128, + DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi), + DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi)); + + low = DAG.getStore(the_chain, dl, rlow, basePtr, + lowMemPtr, + SN->isVolatile(), SN->isNonTemporal(), 16); + hi = DAG.getStore(the_chain, dl, rhi, + DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, + DAG.getConstant( 16, PtrVT)), + highMemPtr, + SN->isVolatile(), SN->isNonTemporal(), 16); + result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0), + hi.getValue(0)); } - return SDValue(); + return result; } //! Generate the address of a constant pool entry. @@ -993,7 +1094,7 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) { SDValue T = DAG.getConstant(dbits, MVT::i64); SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec)); } return SDValue(); @@ -1013,9 +1114,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, MachineRegisterInfo &RegInfo = MF.getRegInfo(); SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); - unsigned ArgOffset = SPUFrameInfo::minStackSize(); + unsigned ArgOffset = SPUFrameLowering::minStackSize(); unsigned ArgRegIdx = 0; - unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); + unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1080,7 +1181,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // or we're forced to do vararg int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), + false, false, 0); ArgOffset += StackSlotSize; } @@ -1091,8 +1193,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // vararg handling: if (isVarArg) { - // FIXME: we should be able to query the argument registers from - // tablegen generated code. + // FIXME: we should be able to query the argument registers from + // tablegen generated code. static const unsigned ArgRegs[] = { SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, @@ -1117,9 +1219,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); - unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); - SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, + SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), false, false, 0); Chain = Store.getOperand(0); MemOps.push_back(Store); @@ -1163,14 +1265,14 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); unsigned NumOps = Outs.size(); - unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); + unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, - *DAG.getContext()); + *DAG.getContext()); // FIXME: allow for other calling conventions CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); - + const unsigned NumArgRegs = ArgLocs.size(); @@ -1184,7 +1286,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Figure out which arguments are going to go in registers, and which in // memory. - unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] + unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR] unsigned ArgRegIdx = 0; // Keep track of registers passing arguments @@ -1219,7 +1321,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ArgRegIdx != NumArgRegs) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0)); ArgOffset += StackSlotSize; } @@ -1230,7 +1333,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Accumulate how many bytes are to be pushed on the stack, including the // linkage area, and parameter passing area. According to the SPU ABI, // we minimally need space for [LR] and [SP]. - unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize(); + unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize(); // Insert a call sequence start Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, @@ -1311,7 +1414,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (InFlag.getNode()) Ops.push_back(InFlag); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue), &Ops[0], Ops.size()); InFlag = Chain.getValue(1); @@ -1334,7 +1437,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // If the call has results, copy the values out of the ret val registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; - + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); @@ -1567,7 +1670,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { && "LowerBUILD_VECTOR: Unexpected floating point vector element."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants SDValue T = DAG.getConstant(Value32, MVT::i32); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, + return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); break; } @@ -1577,7 +1680,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); // NOTE: pretend the constant is an integer. LLVM won't load FP constants SDValue T = DAG.getConstant(f64val, MVT::i64); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, + return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); break; } @@ -1587,7 +1690,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { SmallVector<SDValue, 8> Ops; Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); } case MVT::v8i16: { @@ -1621,7 +1724,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, if (upper == lower) { // Magic constant that can be matched by IL, ILA, et. al. SDValue Val = DAG.getTargetConstant(upper, MVT::i32); - return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + return DAG.getNode(ISD::BITCAST, dl, OpVT, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Val, Val, Val, Val)); } else { @@ -1650,7 +1753,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, // Create lower vector if not a special pattern if (!lower_special) { SDValue LO32C = DAG.getConstant(lower, MVT::i32); - LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, LO32C, LO32C, LO32C, LO32C)); } @@ -1658,7 +1761,7 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, // Create upper vector if not a special pattern if (!upper_special) { SDValue HI32C = DAG.getConstant(upper, MVT::i32); - HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, + HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, HI32C, HI32C, HI32C, HI32C)); } @@ -1735,14 +1838,14 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned CurrElt = 0; unsigned MaxElts = VecVT.getVectorNumElements(); unsigned PrevElt = 0; - unsigned V0Elt = 0; bool monotonic = true; bool rotate = true; + int rotamt=0; EVT maskVT; // which of the c?d instructions to use if (EltVT == MVT::i8) { V2EltIdx0 = 16; - maskVT = MVT::v16i8; + maskVT = MVT::v16i8; } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; maskVT = MVT::v8i16; @@ -1758,7 +1861,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { for (unsigned i = 0; i != MaxElts; ++i) { if (SVN->getMaskElt(i) < 0) continue; - + unsigned SrcElt = SVN->getMaskElt(i); if (monotonic) { @@ -1782,13 +1885,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if ((PrevElt == SrcElt - 1) || (PrevElt == MaxElts - 1 && SrcElt == 0)) { PrevElt = SrcElt; - if (SrcElt == 0) - V0Elt = i; } else { rotate = false; } - } else if (i == 0) { - // First time through, need to keep track of previous element + } else if (i == 0 || (PrevElt==0 && SrcElt==1)) { + // First time or after a "wrap around" + rotamt = SrcElt-i; PrevElt = SrcElt; } else { // This isn't a rotation, takes elements from vector 2 @@ -1806,15 +1908,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), DAG.getConstant(V2EltOffset, MVT::i32)); - SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, + SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); // Use shuffle mask in SHUFB synthetic instruction: return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, ShufMaskOp); } else if (rotate) { - int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; - + if (rotamt < 0) + rotamt +=MaxElts; + rotamt *= EltVT.getSizeInBits()/8; return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), V1, DAG.getConstant(rotamt, MVT::i16)); } else { @@ -1999,7 +2102,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(scaleShift, MVT::i32)); } - vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); + vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt); // Replicate the bytes starting at byte 0 across the entire vector (for // consistency with the notion of a unified register set) @@ -2069,7 +2172,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { DAG.getRegister(SPU::R1, PtrVT), DAG.getConstant(Offset, PtrVT)); // widen the mask when dealing with half vectors - EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), + EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), 128/ VT.getVectorElementType().getSizeInBits()); SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); @@ -2077,7 +2180,7 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { DAG.getNode(SPUISD::SHUFB, dl, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), VecOp, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask)); return result; } @@ -2197,12 +2300,12 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) { ConstVec = Op.getOperand(0); Arg = Op.getOperand(1); if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { - if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { + if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { ConstVec = ConstVec.getOperand(0); } else { ConstVec = Op.getOperand(1); Arg = Op.getOperand(0); - if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { + if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { ConstVec = ConstVec.getOperand(0); } } @@ -2243,7 +2346,7 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) { */ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), VT, (128 / VT.getSizeInBits())); DebugLoc dl = Op.getDebugLoc(); @@ -2419,7 +2522,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently // selected to a NOP: - SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); + SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs); SDValue lhsHi32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, DAG.getNode(ISD::SRL, dl, IntVT, @@ -2453,7 +2556,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, ISD::SETGT)); } - SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); + SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs); SDValue rhsHi32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, DAG.getNode(ISD::SRL, dl, IntVT, @@ -2567,7 +2670,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) // Type to truncate to EVT VT = Op.getValueType(); MVT simpleVT = VT.getSimpleVT(); - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), VT, (128 / VT.getSizeInBits())); DebugLoc dl = Op.getDebugLoc(); @@ -2575,7 +2678,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) SDValue Op0 = Op.getOperand(0); EVT Op0VT = Op0.getValueType(); - if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { + if (Op0VT == MVT::i128 && simpleVT == MVT::i64) { // Create shuffle mask, least significant doubleword of quadword unsigned maskHigh = 0x08090a0b; unsigned maskLow = 0x0c0d0e0f; @@ -2616,6 +2719,12 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) SDValue Op0 = Op.getOperand(0); MVT Op0VT = Op0.getValueType().getSimpleVT(); + // extend i8 & i16 via i32 + if (Op0VT == MVT::i8 || Op0VT == MVT::i16) { + Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0); + Op0VT = MVT::i32; + } + // The type to extend to needs to be a i128 and // the type to extend from needs to be i64 or i32. assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && @@ -2640,12 +2749,17 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), DAG.getConstant(31, MVT::i32)); + // reinterpret as a i128 (SHUFB requires it). This gets lowered away. + SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + dl, Op0VT, Op0, + DAG.getTargetConstant( + SPU::GPRCRegClass.getID(), + MVT::i32)), 0); // Shuffle bytes - Copy the sign bits into the upper 64 bits // and the input value into the lower 64 bits. SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, - DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask); - - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle); + extended, sraVal, shufMask); + return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle); } //! Custom (target-specific) lowering entry point @@ -2903,8 +3017,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const } break; } - case SPUISD::SHLQUAD_L_BITS: - case SPUISD::SHLQUAD_L_BYTES: + case SPUISD::SHL_BITS: + case SPUISD::SHL_BYTES: case SPUISD::ROTBYTES_LEFT: { SDValue Op1 = N->getOperand(1); @@ -2982,6 +3096,38 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const return TargetLowering::getConstraintType(ConstraintLetter); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +SPUTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + //FIXME: Seems like the supported constraint letters were just copied + // from PPC, as the following doesn't correspond to the GCC docs. + // I'm leaving it so until someone adds the corresponding lowering support. + case 'b': + case 'r': + case 'f': + case 'd': + case 'v': + case 'y': + weight = CW_Register; + break; + } + return weight; +} + std::pair<unsigned, const TargetRegisterClass*> SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const @@ -3086,3 +3232,28 @@ SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The SPU target isn't yet aware of offsets. return false; } + +// can we compare to Imm without writing it into a register? +bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + //ceqi, cgti, etc. all take s10 operand + return isInt<10>(Imm); +} + +bool +SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type * ) const{ + + // A-form: 18bit absolute address. + if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) + return true; + + // D-form: reg + 14bit offset + if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs)) + return true; + + // X-form: reg+reg + if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0) + return true; + + return false; +} diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 6d3c90b..95d44af 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -41,8 +41,9 @@ namespace llvm { CNTB, ///< Count leading ones in bytes PREFSLOT2VEC, ///< Promote scalar->vector VEC2PREFSLOT, ///< Extract element 0 - SHLQUAD_L_BITS, ///< Rotate quad left, by bits - SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes + SHL_BITS, ///< Shift quad left, by bits + SHL_BYTES, ///< Shift quad left, by bytes + SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros. VEC_ROTL, ///< Vector rotate left VEC_ROTR, ///< Vector rotate right ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) @@ -129,6 +130,11 @@ namespace llvm { ConstraintType getConstraintType(const std::string &ConstraintLetter) const; + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -170,6 +176,19 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; + + virtual bool isLegalICmpImmediate(int64_t Imm) const; + + virtual bool isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const; + + /// After allocating this many registers, the allocator should feel + /// register pressure. The value is a somewhat random guess, based on the + /// number of non callee saved registers in the C calling convention. + virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC, + MachineFunction &MF) const{ + return 50; + } }; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 26d6b4f..f9e6c72 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -16,6 +16,7 @@ #include "SPUInstrBuilder.h" #include "SPUTargetMachine.h" #include "SPUGenInstrInfo.inc" +#include "SPUHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -54,6 +55,16 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) RI(*TM.getSubtargetImpl(), *this) { /* NOP */ } +/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for +/// this target when scheduling the DAG. +ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer( + const TargetMachine *TM, + const ScheduleDAG *DAG) const { + const TargetInstrInfo *TII = TM->getInstrInfo(); + assert(TII && "No InstrInfo?"); + return new SPUHazardRecognizer(*TII); +} + unsigned SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { @@ -129,7 +140,7 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) const { unsigned opc; - bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset()); + bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); if (RC == SPU::GPRCRegisterClass) { opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128); } else if (RC == SPU::R64CRegisterClass) { @@ -164,7 +175,7 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) const { unsigned opc; - bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset()); + bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); if (RC == SPU::GPRCRegisterClass) { opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128); } else if (RC == SPU::R64CRegisterClass) { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 191e55d..e5e9148 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -32,6 +32,10 @@ namespace llvm { /// virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index ca0fe00..25f6fd0 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -416,7 +416,7 @@ multiclass ImmLoadAddress def lo: ILARegInst<R32C, symbolLo, imm18>; def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val), - [/* no pattern */]>; + [(set R32C:$rT, imm18:$val)]>; } defm ILA : ImmLoadAddress; @@ -1167,10 +1167,10 @@ class XSHWRegInst<RegisterClass rclass>: [(set rclass:$rDest, (sext R16C:$rSrc))]>; multiclass ExtendHalfwordWord { - def v4i32: XSHWVecInst<v4i32, v8i16>; - + def v4i32: XSHWVecInst<v8i16, v4i32>; + def r16: XSHWRegInst<R32C>; - + def r32: XSHWInRegInst<R32C, [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>; def r64: XSHWInRegInst<R64C, [/* no pattern */]>; @@ -1385,59 +1385,6 @@ class ORRegInst<RegisterClass rclass>: ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>; -// ORCvtForm: OR conversion form -// -// This is used to "convert" the preferred slot to its vector equivalent, as -// well as convert a vector back to its preferred slot. -// -// These are effectively no-ops, but need to exist for proper type conversion -// and type coercion. - -class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]> - : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> { - bits<7> RA; - bits<7> RT; - - let Pattern = pattern; - - let Inst{0-10} = 0b10000010000; - let Inst{11-17} = RA; - let Inst{18-24} = RA; - let Inst{25-31} = RT; -} - -class ORPromoteScalar<RegisterClass rclass>: - ORCvtForm<(outs VECREG:$rT), (ins rclass:$rA)>; - -class ORExtractElt<RegisterClass rclass>: - ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>; - -/* class ORCvtRegGPRC<RegisterClass rclass>: - ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */ - -/* class ORCvtGPRCReg<RegisterClass rclass>: - ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */ - -class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>: - ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>; - -class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>: - ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>; - -class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>: - ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>; - -class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>: - ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>; - -class ORCvtGPRCVec: - ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; - -class ORCvtVecGPRC: - ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; - -class ORCvtVecVec: - ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>; multiclass BitwiseOr { @@ -1468,119 +1415,48 @@ multiclass BitwiseOr def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), [/* no pattern */]>; - - // scalar->vector promotion, prefslot2vec: - def v16i8_i8: ORPromoteScalar<R8C>; - def v8i16_i16: ORPromoteScalar<R16C>; - def v4i32_i32: ORPromoteScalar<R32C>; - def v2i64_i64: ORPromoteScalar<R64C>; - def v4f32_f32: ORPromoteScalar<R32FP>; - def v2f64_f64: ORPromoteScalar<R64FP>; - - // vector->scalar demotion, vec2prefslot: - def i8_v16i8: ORExtractElt<R8C>; - def i16_v8i16: ORExtractElt<R16C>; - def i32_v4i32: ORExtractElt<R32C>; - def i64_v2i64: ORExtractElt<R64C>; - def f32_v4f32: ORExtractElt<R32FP>; - def f64_v2f64: ORExtractElt<R64FP>; - - // Conversion from vector to GPRC - def i128_vec: ORCvtVecGPRC; - - // Conversion from GPRC to vector - def vec_i128: ORCvtGPRCVec; - -/* - // Conversion from register to GPRC - def i128_r64: ORCvtRegGPRC<R64C>; - def i128_f64: ORCvtRegGPRC<R64FP>; - def i128_r32: ORCvtRegGPRC<R32C>; - def i128_f32: ORCvtRegGPRC<R32FP>; - def i128_r16: ORCvtRegGPRC<R16C>; - def i128_r8: ORCvtRegGPRC<R8C>; - - // Conversion from GPRC to register - def r64_i128: ORCvtGPRCReg<R64C>; - def f64_i128: ORCvtGPRCReg<R64FP>; - def r32_i128: ORCvtGPRCReg<R32C>; - def f32_i128: ORCvtGPRCReg<R32FP>; - def r16_i128: ORCvtGPRCReg<R16C>; - def r8_i128: ORCvtGPRCReg<R8C>; -*/ -/* - // Conversion from register to R32C: - def r32_r16: ORCvtFormRegR32<R16C>; - def r32_r8: ORCvtFormRegR32<R8C>; - - // Conversion from R32C to register - def r32_r16: ORCvtFormR32Reg<R16C>; - def r32_r8: ORCvtFormR32Reg<R8C>; -*/ - - // Conversion from R64C to register: - def r32_r64: ORCvtFormR64Reg<R32C>; - // def r16_r64: ORCvtFormR64Reg<R16C>; - // def r8_r64: ORCvtFormR64Reg<R8C>; - - // Conversion to R64C from register: - def r64_r32: ORCvtFormRegR64<R32C>; - // def r64_r16: ORCvtFormRegR64<R16C>; - // def r64_r8: ORCvtFormRegR64<R8C>; - - // bitconvert patterns: - def r32_f32: ORCvtFormR32Reg<R32FP, - [(set R32FP:$rT, (bitconvert R32C:$rA))]>; - def f32_r32: ORCvtFormRegR32<R32FP, - [(set R32C:$rT, (bitconvert R32FP:$rA))]>; - - def r64_f64: ORCvtFormR64Reg<R64FP, - [(set R64FP:$rT, (bitconvert R64C:$rA))]>; - def f64_r64: ORCvtFormRegR64<R64FP, - [(set R64C:$rT, (bitconvert R64FP:$rA))]>; } defm OR : BitwiseOr; -// scalar->vector promotion patterns (preferred slot to vector): +//===----------------------------------------------------------------------===// +// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers +//===----------------------------------------------------------------------===// def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)), - (ORv16i8_i8 R8C:$rA)>; + (COPY_TO_REGCLASS R8C:$rA, VECREG)>; def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)), - (ORv8i16_i16 R16C:$rA)>; + (COPY_TO_REGCLASS R16C:$rA, VECREG)>; def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)), - (ORv4i32_i32 R32C:$rA)>; + (COPY_TO_REGCLASS R32C:$rA, VECREG)>; def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)), - (ORv2i64_i64 R64C:$rA)>; + (COPY_TO_REGCLASS R64C:$rA, VECREG)>; def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)), - (ORv4f32_f32 R32FP:$rA)>; + (COPY_TO_REGCLASS R32FP:$rA, VECREG)>; def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)), - (ORv2f64_f64 R64FP:$rA)>; - -// ORi*_v*: Used to extract vector element 0 (the preferred slot), otherwise -// known as converting the vector back to its preferred slot - -def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)), - (ORi8_v16i8 VECREG:$rA)>; + (COPY_TO_REGCLASS R64FP:$rA, VECREG)>; + +def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))), + (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>; -def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)), - (ORi16_v8i16 VECREG:$rA)>; +def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))), + (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>; -def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)), - (ORi32_v4i32 VECREG:$rA)>; +def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))), + (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>; -def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)), - (ORi64_v2i64 VECREG:$rA)>; +def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))), + (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>; -def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)), - (ORf32_v4f32 VECREG:$rA)>; +def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))), + (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>; -def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)), - (ORf64_v2f64 VECREG:$rA)>; +def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))), + (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>; // Load Register: This is an assembler alias for a bitwise OR of a register // against itself. It's here because it brings some clarity to assembly @@ -2093,7 +1969,7 @@ defm EQV: BitEquivalence; class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>: RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC", - IntegerOp, pattern>; + ShuffleOp, pattern>; class SHUFBVecInst<ValueType resultvec, ValueType maskvec>: SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), @@ -2134,7 +2010,7 @@ defm SHUFB : ShuffleBytes; class SHLHInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftVec, pattern>; class SHLHVecInst<ValueType vectype>: SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), @@ -2156,7 +2032,7 @@ defm SHLH : ShiftLeftHalfword; class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftVec, pattern>; class SHLHIVecInst<ValueType vectype>: SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), @@ -2182,7 +2058,7 @@ def : Pat<(shl R16C:$rA, (i32 uimm7:$val)), class SHLInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftVec, pattern>; multiclass ShiftLeftWord { @@ -2201,7 +2077,7 @@ defm SHL: ShiftLeftWord; class SHLIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftVec, pattern>; multiclass ShiftLeftWordImm { @@ -2230,7 +2106,7 @@ defm SHLI : ShiftLeftWordImm; class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftQuad, pattern>; class SHLQBIVecInst<ValueType vectype>: SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2259,7 +2135,7 @@ defm SHLQBI : ShiftLeftQuadByBits; // enforcement, whereas with SHLQBI, we have to "take it on faith." class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftQuad, pattern>; class SHLQBIIVecInst<ValueType vectype>: SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), @@ -2283,7 +2159,7 @@ defm SHLQBII : ShiftLeftQuadByBitsImm; class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftQuad, pattern>; class SHLQBYVecInst<ValueType vectype>: SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2306,7 +2182,7 @@ defm SHLQBY: ShiftLeftQuadBytes; class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftQuad, pattern>; class SHLQBYIVecInst<ValueType vectype>: SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), @@ -2330,7 +2206,7 @@ defm SHLQBYI : ShiftLeftQuadBytesImm; class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftQuad, pattern>; class SHLQBYBIVecInst<ValueType vectype>: SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2359,7 +2235,7 @@ defm SHLQBYBI : ShiftLeftQuadBytesBitCount; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ class ROTHInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftVec, pattern>; class ROTHVecInst<ValueType vectype>: ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), @@ -2386,7 +2262,7 @@ def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftVec, pattern>; class ROTHIVecInst<ValueType vectype>: ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), @@ -2413,7 +2289,7 @@ def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)), class ROTInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftVec, pattern>; class ROTVecInst<ValueType vectype>: ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2461,7 +2337,7 @@ def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))), class ROTIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftVec, pattern>; class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>: ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), @@ -2491,12 +2367,15 @@ defm ROTI : RotateLeftWordImm; class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftQuad, pattern>; -class ROTQBYVecInst<ValueType vectype>: - ROTQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - [(set (vectype VECREG:$rT), - (SPUrotbytes_left (vectype VECREG:$rA), R32C:$rB))]>; +class ROTQBYGenInst<ValueType type, RegisterClass rc>: + ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB), + [(set (type rc:$rT), + (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>; + +class ROTQBYVecInst<ValueType type>: + ROTQBYGenInst<type, VECREG>; multiclass RotateQuadLeftByBytes { @@ -2506,6 +2385,7 @@ multiclass RotateQuadLeftByBytes def v4f32: ROTQBYVecInst<v4f32>; def v2i64: ROTQBYVecInst<v2i64>; def v2f64: ROTQBYVecInst<v2f64>; + def i128: ROTQBYGenInst<i128, GPRC>; } defm ROTQBY: RotateQuadLeftByBytes; @@ -2516,12 +2396,15 @@ defm ROTQBY: RotateQuadLeftByBytes; class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftQuad, pattern>; + +class ROTQBYIGenInst<ValueType type, RegisterClass rclass>: + ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val), + [(set (type rclass:$rT), + (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>; class ROTQBYIVecInst<ValueType vectype>: - ROTQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), - [(set (vectype VECREG:$rT), - (SPUrotbytes_left (vectype VECREG:$rA), (i16 uimm7:$val)))]>; + ROTQBYIGenInst<vectype, VECREG>; multiclass RotateQuadByBytesImm { @@ -2531,6 +2414,7 @@ multiclass RotateQuadByBytesImm def v4f32: ROTQBYIVecInst<v4f32>; def v2i64: ROTQBYIVecInst<v2i64>; def vfi64: ROTQBYIVecInst<v2f64>; + def i128: ROTQBYIGenInst<i128, GPRC>; } defm ROTQBYI: RotateQuadByBytesImm; @@ -2539,7 +2423,7 @@ defm ROTQBYI: RotateQuadByBytesImm; class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b00110011100, OOL, IOL, "rotqbybi\t$rT, $rA, $shift", - RotateShift, pattern>; + RotShiftQuad, pattern>; class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>: ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift), @@ -2564,7 +2448,7 @@ defm ROTQBYBI : RotateQuadByBytesByBitshift; class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftQuad, pattern>; class ROTQBIVecInst<ValueType vectype>: ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2589,7 +2473,7 @@ defm ROTQBI: RotateQuadByBitCount; class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftQuad, pattern>; class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>: @@ -2624,7 +2508,7 @@ defm ROTQBII : RotateQuadByBitCountImm; class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftVec, pattern>; def ROTHMv8i16: ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2666,7 +2550,7 @@ def : Pat<(srl R16C:$rA, R8C:$rB), class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftVec, pattern>; def ROTHMIv8i16: ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), @@ -2697,7 +2581,7 @@ def: Pat<(srl R16C:$rA, (i8 uimm7:$val)), // ROTM v4i32 form: See the ROTHM v8i16 comments. class ROTMInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftVec, pattern>; def ROTMv4i32: ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2732,7 +2616,7 @@ def : Pat<(srl R32C:$rA, R8C:$rB), // ROTMI v4i32 form: See the comment for ROTHM v8i16. def ROTMIv4i32: RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), - "rotmi\t$rT, $rA, $val", RotateShift, + "rotmi\t$rT, $rA, $val", RotShiftVec, [(set (v4i32 VECREG:$rT), (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>; @@ -2745,7 +2629,7 @@ def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)), // ROTMI r32 form: know how to complement the immediate value. def ROTMIr32: RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val), - "rotmi\t$rT, $rA, $val", RotateShift, + "rotmi\t$rT, $rA, $val", RotShiftVec, [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>; def : Pat<(srl R32C:$rA, (i16 imm:$val)), @@ -2762,7 +2646,7 @@ def : Pat<(srl R32C:$rA, (i8 imm:$val)), class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftQuad, pattern>; class ROTQMBYVecInst<ValueType vectype>: ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2785,9 +2669,13 @@ multiclass RotateQuadBytes defm ROTQMBY : RotateQuadBytes; +def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB), + (ROTQMBYr128 GPRC:$rA, + (SFIr32 R32C:$rB, 0))>; + class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftQuad, pattern>; class ROTQMBYIVecInst<ValueType vectype>: ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), @@ -2827,7 +2715,7 @@ defm ROTQMBYI : RotateQuadBytesImm; class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftQuad, pattern>; class ROTQMBYBIVecInst<ValueType vectype>: ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2839,6 +2727,8 @@ multiclass RotateMaskQuadByBitCount def v8i16: ROTQMBYBIVecInst<v8i16>; def v4i32: ROTQMBYBIVecInst<v4i32>; def v2i64: ROTQMBYBIVecInst<v2i64>; + def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB), + [/*no pattern*/]>; } defm ROTQMBYBI: RotateMaskQuadByBitCount; @@ -2850,7 +2740,7 @@ defm ROTQMBYBI: RotateMaskQuadByBitCount; class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB", - RotateShift, pattern>; + RotShiftQuad, pattern>; class ROTQMBIVecInst<ValueType vectype>: ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), @@ -2873,13 +2763,19 @@ multiclass RotateMaskQuadByBits defm ROTQMBI: RotateMaskQuadByBits; +def : Pat<(srl GPRC:$rA, R32C:$rB), + (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA, + (SFIr32 R32C:$rB, 0)), + (SFIr32 R32C:$rB, 0))>; + + //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // Rotate quad and mask by bits, immediate //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>: RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftQuad, pattern>; class ROTQMBIIVecInst<ValueType vectype>: ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), @@ -2907,7 +2803,7 @@ defm ROTQMBII: RotateMaskQuadByBitsImm; def ROTMAHv8i16: RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - "rotmah\t$rT, $rA, $rB", RotateShift, + "rotmah\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB), @@ -2923,7 +2819,7 @@ def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB), def ROTMAHr16: RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), - "rotmah\t$rT, $rA, $rB", RotateShift, + "rotmah\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; def : Pat<(sra R16C:$rA, R32C:$rB), @@ -2939,7 +2835,7 @@ def : Pat<(sra R16C:$rA, R8C:$rB), def ROTMAHIv8i16: RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), - "rotmahi\t$rT, $rA, $val", RotateShift, + "rotmahi\t$rT, $rA, $val", RotShiftVec, [(set (v8i16 VECREG:$rT), (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>; @@ -2951,7 +2847,7 @@ def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)), def ROTMAHIr16: RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val), - "rotmahi\t$rT, $rA, $val", RotateShift, + "rotmahi\t$rT, $rA, $val", RotShiftVec, [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>; def : Pat<(sra R16C:$rA, (i32 imm:$val)), @@ -2962,7 +2858,7 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)), def ROTMAv4i32: RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), - "rotma\t$rT, $rA, $rB", RotateShift, + "rotma\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB), @@ -2978,7 +2874,7 @@ def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB), def ROTMAr32: RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), - "rotma\t$rT, $rA, $rB", RotateShift, + "rotma\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; def : Pat<(sra R32C:$rA, R32C:$rB), @@ -2995,7 +2891,7 @@ def : Pat<(sra R32C:$rA, R8C:$rB), class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>: RRForm<0b01011110000, OOL, IOL, "rotmai\t$rT, $rA, $val", - RotateShift, pattern>; + RotShiftVec, pattern>; class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>: ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val), @@ -4010,7 +3906,7 @@ def FCGTf32 : "fcgt\t$rT, $rA, $rB", SPrecFP, [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>; -def : Pat<(setugt R32FP:$rA, R32FP:$rB), +def : Pat<(setogt R32FP:$rA, R32FP:$rB), (FCGTf32 R32FP:$rA, R32FP:$rB)>; def FCMGTf32 : @@ -4018,7 +3914,7 @@ def FCMGTf32 : "fcmgt\t$rT, $rA, $rB", SPrecFP, [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; -def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)), +def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)), (FCMGTf32 R32FP:$rA, R32FP:$rB)>; //-------------------------------------------------------------------------- @@ -4320,7 +4216,7 @@ def : Pat<(fabs (v4f32 VECREG:$rA)), // in the odd pipeline) //===----------------------------------------------------------------------===// -def ENOP : SPUInstr<(outs), (ins), "enop", ExecNOP> { +def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> { let Pattern = []; let Inst{0-10} = 0b10000000010; @@ -4379,30 +4275,43 @@ def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>; def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))), - (ORi128_vec VECREG:$src)>; + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))), - (v16i8 (ORvec_i128 GPRC:$src))>; + (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))), - (v8i16 (ORvec_i128 GPRC:$src))>; + (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))), - (v4i32 (ORvec_i128 GPRC:$src))>; + (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))), - (v2i64 (ORvec_i128 GPRC:$src))>; + (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))), - (v4f32 (ORvec_i128 GPRC:$src))>; + (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))), - (v2f64 (ORvec_i128 GPRC:$src))>; + (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; + +def : Pat<(i32 (bitconvert R32FP:$rA)), + (COPY_TO_REGCLASS R32FP:$rA, R32C)>; + +def : Pat<(f32 (bitconvert R32C:$rA)), + (COPY_TO_REGCLASS R32C:$rA, R32FP)>; + +def : Pat<(i64 (bitconvert R64FP:$rA)), + (COPY_TO_REGCLASS R64FP:$rA, R64C)>; + +def : Pat<(f64 (bitconvert R64C:$rA)), + (COPY_TO_REGCLASS R64C:$rA, R64FP)>; + //===----------------------------------------------------------------------===// // Instruction patterns: @@ -4453,11 +4362,12 @@ def : Pat<(i32 (zext R8C:$rSrc)), // zext 8->64: Zero extend bytes to double words def : Pat<(i64 (zext R8C:$rSrc)), - (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32 - (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)), + (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32 + (COPY_TO_REGCLASS + (ANDIi8i32 R8C:$rSrc,0xff), VECREG), 0x4), (ILv4i32 0x0), - (FSMBIv4i32 0x0f0f)))>; + (FSMBIv4i32 0x0f0f)), R64C)>; // anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits def : Pat<(i16 (anyext R8C:$rSrc)), @@ -4465,7 +4375,7 @@ def : Pat<(i16 (anyext R8C:$rSrc)), // anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits def : Pat<(i32 (anyext R8C:$rSrc)), - (ORIi8i32 R8C:$rSrc, 0)>; + (COPY_TO_REGCLASS R8C:$rSrc, R32C)>; // sext 16->64: Sign extend halfword to double word def : Pat<(sext_inreg R64C:$rSrc, i16), @@ -4489,7 +4399,7 @@ def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))), // anyext 16->32: Extend 16->32 bits, irrespective of sign def : Pat<(i32 (anyext R16C:$rSrc)), - (ORIi16i32 R16C:$rSrc, 0)>; + (COPY_TO_REGCLASS R16C:$rSrc, R32C)>; //===----------------------------------------------------------------------===// // Truncates: @@ -4498,61 +4408,61 @@ def : Pat<(i32 (anyext R16C:$rSrc)), //===----------------------------------------------------------------------===// def : Pat<(i8 (trunc GPRC:$src)), - (ORi8_v16i8 + (COPY_TO_REGCLASS (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)))>; + (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>; def : Pat<(i8 (trunc R64C:$src)), - (ORi8_v16i8 + (COPY_TO_REGCLASS (SHUFBv2i64_m32 - (ORv2i64_i64 R64C:$src), - (ORv2i64_i64 R64C:$src), - (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)))>; + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>; def : Pat<(i8 (trunc R32C:$src)), - (ORi8_v16i8 + (COPY_TO_REGCLASS (SHUFBv4i32_m32 - (ORv4i32_i32 R32C:$src), - (ORv4i32_i32 R32C:$src), - (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>; + (COPY_TO_REGCLASS R32C:$src, VECREG), + (COPY_TO_REGCLASS R32C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; def : Pat<(i8 (trunc R16C:$src)), - (ORi8_v16i8 + (COPY_TO_REGCLASS (SHUFBv4i32_m32 - (ORv8i16_i16 R16C:$src), - (ORv8i16_i16 R16C:$src), - (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)))>; + (COPY_TO_REGCLASS R16C:$src, VECREG), + (COPY_TO_REGCLASS R16C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; def : Pat<(i16 (trunc GPRC:$src)), - (ORi16_v8i16 + (COPY_TO_REGCLASS (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)))>; + (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>; def : Pat<(i16 (trunc R64C:$src)), - (ORi16_v8i16 + (COPY_TO_REGCLASS (SHUFBv2i64_m32 - (ORv2i64_i64 R64C:$src), - (ORv2i64_i64 R64C:$src), - (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)))>; + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>; def : Pat<(i16 (trunc R32C:$src)), - (ORi16_v8i16 + (COPY_TO_REGCLASS (SHUFBv4i32_m32 - (ORv4i32_i32 R32C:$src), - (ORv4i32_i32 R32C:$src), - (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)))>; + (COPY_TO_REGCLASS R32C:$src, VECREG), + (COPY_TO_REGCLASS R32C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>; def : Pat<(i32 (trunc GPRC:$src)), - (ORi32_v4i32 + (COPY_TO_REGCLASS (SHUFBgprc GPRC:$src, GPRC:$src, - (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)))>; + (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>; def : Pat<(i32 (trunc R64C:$src)), - (ORi32_v4i32 + (COPY_TO_REGCLASS (SHUFBv2i64_m32 - (ORv2i64_i64 R64C:$src), - (ORv2i64_i64 R64C:$src), - (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)))>; + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>; //===----------------------------------------------------------------------===// // Address generation: SPU, like PPC, has to split addresses into high and diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 25ba88a..99aaeb0 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -24,9 +24,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) { GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; - // Has leb128, .loc and .file + // Has leb128 HasLEB128 = true; - HasDotLocAndDotFile = true; SupportsDebugInformation = true; diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 647da30..a6e621f 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -19,16 +19,16 @@ def SPU_GenControl : SDTypeProfile<1, 1, []>; def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; //===----------------------------------------------------------------------===// // Operand constraints: //===----------------------------------------------------------------------===// def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; // Operand type constraints for vector shuffle/permute operations @@ -83,10 +83,6 @@ def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; // SPUISelLowering.h): def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; -// Shift left quadword by bits and bytes -def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; -def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; - // Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only): def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>; def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>; @@ -105,6 +101,12 @@ def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS", SPUvecshift_type>; +// Shift entire quad left by bytes/bits. Zeros are shifted in on the right +// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128 +def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>; +def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>; +def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>; + // SPU form select mask for bytes, immediate def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>; @@ -154,4 +156,4 @@ class NoEncode<string E> { //===----------------------------------------------------------------------===// def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp new file mode 100644 index 0000000..e2bd2d7 --- /dev/null +++ b/lib/Target/CellSPU/SPUNopFiller.cpp @@ -0,0 +1,153 @@ +//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The final pass just before assembly printing. This pass is the last +// checkpoint where nops and lnops are added to the instruction stream to +// satisfy the dual issue requirements. The actual dual issue scheduling is +// done (TODO: nowhere, currently) +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + struct SPUNopFiller : public MachineFunctionPass { + + TargetMachine &TM; + const TargetInstrInfo *TII; + const InstrItineraryData *IID; + bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd + + static char ID; + SPUNopFiller(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), + IID(tm.getInstrItineraryData()) + { + DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; ); + } + + virtual const char *getPassName() const { + return "SPU nop/lnop Filler"; + } + + void runOnMachineBasicBlock(MachineBasicBlock &MBB); + + bool runOnMachineFunction(MachineFunction &F) { + isEvenPlace = true; //all functions get an .align 3 directive at start + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + runOnMachineBasicBlock(*FI); + return true; //never-ever do any more modifications, just print it! + } + + typedef enum { none = 0, // no more instructions in this function / BB + pseudo = 1, // this does not get executed + even = 2, + odd = 3 } SPUOpPlace; + SPUOpPlace getOpPlacement( MachineInstr &instr ); + + }; + char SPUNopFiller::ID = 0; + +} + +// Fill a BasicBlock to alignment. +// In the assebly we align the functions to 'even' adresses, but +// basic blocks have an implicit alignmnet. We hereby define +// basic blocks to have the same, even, alignment. +void SPUNopFiller:: +runOnMachineBasicBlock(MachineBasicBlock &MBB) +{ + assert( isEvenPlace && "basic block start from odd address"); + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + { + SPUOpPlace this_optype, next_optype; + MachineBasicBlock::iterator J = I; + J++; + + this_optype = getOpPlacement( *I ); + next_optype = none; + while (J!=MBB.end()){ + next_optype = getOpPlacement( *J ); + ++J; + if (next_optype != pseudo ) + break; + } + + // padd: odd(wrong), even(wrong), ... + // to: nop(corr), odd(corr), even(corr)... + if( isEvenPlace && this_optype == odd && next_optype == even ) { + DEBUG( dbgs() <<"Adding NOP before: "; ); + DEBUG( I->dump(); ); + BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP)); + isEvenPlace=false; + } + + // padd: even(wrong), odd(wrong), ... + // to: lnop(corr), even(corr), odd(corr)... + else if ( !isEvenPlace && this_optype == even && next_optype == odd){ + DEBUG( dbgs() <<"Adding LNOP before: "; ); + DEBUG( I->dump(); ); + BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP)); + isEvenPlace=true; + } + + // now go to next mem slot + if( this_optype != pseudo ) + isEvenPlace = !isEvenPlace; + + } + + // padd basicblock end + if( !isEvenPlace ){ + MachineBasicBlock::iterator J = MBB.end(); + J--; + if (getOpPlacement( *J ) == odd) { + DEBUG( dbgs() <<"Padding basic block with NOP\n"; ); + BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP)); + } + else { + J++; + DEBUG( dbgs() <<"Padding basic block with LNOP\n"; ); + BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP)); + } + isEvenPlace=true; + } +} + +FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) { + return new SPUNopFiller(tm); +} + +// Figure out if 'instr' is executed in the even or odd pipeline +SPUNopFiller::SPUOpPlace +SPUNopFiller::getOpPlacement( MachineInstr &instr ) { + int sc = instr.getDesc().getSchedClass(); + const InstrStage *stage = IID->beginStage(sc); + unsigned FUs = stage->getUnits(); + SPUOpPlace retval; + + switch( FUs ) { + case 0: retval = pseudo; break; + case 1: retval = odd; break; + case 2: retval = even; break; + default: retval= pseudo; + assert( false && "got unknown FuncUnit\n"); + break; + }; + return retval; +} diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td index e1a0358..96cde51 100644 --- a/lib/Target/CellSPU/SPUOperands.td +++ b/lib/Target/CellSPU/SPUOperands.td @@ -143,7 +143,7 @@ def immU16 : PatLeaf<(imm), [{ def imm18 : PatLeaf<(imm), [{ // imm18 predicate: True if the immediate fits into an 18-bit unsigned field. int Value = (int) N->getZExtValue(); - return ((Value & ((1 << 19) - 1)) == Value); + return isUInt<18>(Value); }]>; def lo16 : PatLeaf<(imm), [{ @@ -203,7 +203,7 @@ def FPimm_sext16 : SDNodeXForm<fpimm, [{ def FPimm_u18 : SDNodeXForm<fpimm, [{ float fval = N->getValueAPF().convertToFloat(); - return getI32Imm(FloatToBits(fval) & ((1 << 19) - 1)); + return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1)); }]>; def fpimmSExt16 : PatLeaf<(fpimm), [{ @@ -225,7 +225,7 @@ def hi16_f32 : PatLeaf<(fpimm), [{ def fpimm18 : PatLeaf<(fpimm), [{ if (N->getValueType(0) == MVT::f32) { uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat()); - return ((Value & ((1 << 19) - 1)) == Value); + return isUInt<18>(Value); } return false; @@ -654,7 +654,11 @@ def memrr : Operand<iPTR> { // A-form : abs (256K LSA offset) // D-form(2): [r+I7] (7-bit signed offset + reg) -def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr", [], []>; -def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr", [], []>; -def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr", [], []>; -def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr", [], []>; +def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr", + [], [SDNPWantRoot]>; +def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr", + [], [SDNPWantRoot]>; +def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr", + [], [SDNPWantRoot]>; +def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr", + [], [SDNPWantRoot]>; diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index cf71891..0bdd50a 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -18,7 +18,7 @@ #include "SPUInstrBuilder.h" #include "SPUSubtarget.h" #include "SPUMachineFunction.h" -#include "SPUFrameInfo.h" +#include "SPUFrameLowering.h" #include "llvm/Constants.h" #include "llvm/Type.h" #include "llvm/CodeGen/ValueTypes.h" @@ -30,7 +30,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -240,25 +240,6 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -// needsFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -// -static bool needsFP(const MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); -} - -//-------------------------------------------------------------------------- -// hasFP - Return true if the specified function actually has a dedicated frame -// pointer register. This is true if the function needs a frame pointer and has -// a non-zero stack size. -bool -SPURegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getStackSize() && needsFP(MF); -} - //-------------------------------------------------------------------------- void SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, @@ -302,7 +283,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, MachineOperand &MO = MI.getOperand(OpNo); // Offset is biased by $lr's slot at the bottom. - Offset += MO.getImm() + MFI->getStackSize() + SPUFrameInfo::minStackSize(); + Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize(); assert((Offset & 0xf) == 0 && "16-byte alignment violated in eliminateFrameIndex"); @@ -329,225 +310,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } } -/// determineFrameLayout - Determine the size of the frame and maximum call -/// frame size. -void -SPURegisterInfo::determineFrameLayout(MachineFunction &MF) const -{ - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); - - // Get the alignments provided by the target, and the maximum alignment - // (if any) of the fixed frame objects. - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment()); - assert(isPowerOf2_32(Align) && "Alignment is not power of 2"); - unsigned AlignMask = Align - 1; - - // Get the maximum call frame size of all the calls. - unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); - - // If we have dynamic alloca then maxCallFrameSize needs to be aligned so - // that allocations will be aligned. - if (MFI->hasVarSizedObjects()) - maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; - - // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); - - // Include call frame size in total. - FrameSize += maxCallFrameSize; - - // Make sure the frame is aligned. - FrameSize = (FrameSize + AlignMask) & ~AlignMask; - - // Update frame info. - MFI->setStackSize(FrameSize); -} - -void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) - const { - // Mark LR and SP unused, since the prolog spills them to stack and - // we don't want anyone else to spill them for us. - // - // Also, unless R2 is really used someday, don't spill it automatically. - MF.getRegInfo().setPhysRegUnused(SPU::R0); - MF.getRegInfo().setPhysRegUnused(SPU::R1); - MF.getRegInfo().setPhysRegUnused(SPU::R2); - - MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterClass *RC = &SPU::R32CRegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - - -} - -void SPURegisterInfo::emitPrologue(MachineFunction &MF) const -{ - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Prepare for debug frame info. - bool hasDebugInfo = MMI.hasDebugInfo(); - MCSymbol *FrameLabel = 0; - - // Move MBBI back to the beginning of the function. - MBBI = MBB.begin(); - - // Work out frame sizes. - determineFrameLayout(MF); - int FrameSize = MFI->getStackSize(); - - assert((FrameSize & 0xf) == 0 - && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); - - // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->adjustsStack()) { - FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); - if (hasDebugInfo) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel); - } - - // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) - // for the ABI - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16) - .addReg(SPU::R1); - if (isInt<10>(FrameSize)) { - // Spill $sp to adjusted $sp - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize) - .addReg(SPU::R1); - // Adjust $sp by required amout - BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) - .addImm(FrameSize); - } else if (isInt<16>(FrameSize)) { - // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use - // $r2 to adjust $sp: - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) - .addImm(-16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) - .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1) - .addReg(SPU::R2) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) - .addReg(SPU::R1) - .addReg(SPU::R2); - BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2) - .addReg(SPU::R2) - .addImm(16); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) - .addReg(SPU::R2) - .addReg(SPU::R1); - } else { - report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); - } - - if (hasDebugInfo) { - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - - // Show update of SP. - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - unsigned Reg = CSI[I].getReg(); - if (Reg == SPU::R0) continue; - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); - } - - // Mark effective beginning of when frame pointer is ready. - MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel); - - MachineLocation FPDst(SPU::R1); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); - } - } else { - // This is a leaf function -- insert a branch hint iff there are - // sufficient number instructions in the basic block. Note that - // this is just a best guess based on the basic block's size. - if (MBB.size() >= (unsigned) SPUFrameInfo::branchHintPenalty()) { - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - dl = MBBI->getDebugLoc(); - - // Insert terminator label - BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)) - .addSym(MMI.getContext().CreateTempSymbol()); - } - } -} - -void -SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const -{ - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - int FrameSize = MFI->getStackSize(); - int LinkSlotOffset = SPUFrameInfo::stackSlotSize(); - DebugLoc dl = MBBI->getDebugLoc(); - - assert(MBBI->getOpcode() == SPU::RET && - "Can only insert epilog into returning blocks"); - assert((FrameSize & 0xf) == 0 - && "SPURegisterInfo::emitEpilogue: FrameSize not aligned"); - - // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->adjustsStack()) { - FrameSize = FrameSize + SPUFrameInfo::minStackSize(); - if (isInt<10>(FrameSize + LinkSlotOffset)) { - // Reload $lr, adjust $sp by required amount - // Note: We do this to slightly improve dual issue -- not by much, but it - // is an opportunity for dual issue. - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) - .addImm(FrameSize + LinkSlotOffset) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1) - .addReg(SPU::R1) - .addImm(FrameSize); - } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { - // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use - // $r2 to adjust $sp: - BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) - .addImm(16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) - .addImm(FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) - .addReg(SPU::R1) - .addReg(SPU::R2); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) - .addImm(16) - .addReg(SPU::R1); - BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). - addReg(SPU::R2) - .addImm(16); - BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) - .addReg(SPU::R2) - .addReg(SPU::R1); - } else { - report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); - } - } -} - unsigned SPURegisterInfo::getRARegister() const { @@ -560,26 +322,16 @@ SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const return SPU::R1; } -void -SPURegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const -{ - // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(SPU::R1, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - - int SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { // FIXME: Most probably dwarf numbers differs for Linux and Darwin return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } -int +int SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const { - switch(dFormOpcode) + switch(dFormOpcode) { case SPU::AIr32: return SPU::Ar32; case SPU::LQDr32: return SPU::LQXr32; @@ -602,10 +354,10 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const // TODO this is already copied from PPC. Could this convenience function // be moved to the RegScavenger class? -unsigned -SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, +unsigned +SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, - const TargetRegisterClass *RC, + const TargetRegisterClass *RC, int SPAdj) const { assert(RS && "Register scavenging must be on"); diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index aedb769..641da04 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -33,7 +33,7 @@ namespace llvm { public: SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii); - + //! Translate a register's enum value to a register number /*! This method translates a register's enum value to it's regiser number, @@ -56,8 +56,6 @@ namespace llvm { //! Return the reserved registers BitVector getReservedRegs(const MachineFunction &MF) const; - //! Prediate: Target has dedicated frame pointer - bool hasFP(const MachineFunction &MF) const; //! Eliminate the call frame setup pseudo-instructions void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, @@ -65,21 +63,11 @@ namespace llvm { //! Convert frame indicies into machine operands void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - //! Determine the frame's layour - void determineFrameLayout(MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - //! Emit the function prologue - void emitPrologue(MachineFunction &MF) const; - //! Emit the function epilogue - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + //! Get return address register (LR, aka R0) unsigned getRARegister() const; //! Get the stack frame register (SP, aka R1) unsigned getFrameRegister(const MachineFunction &MF) const; - //! Perform target-specific stack frame setup. - void getInitialFrameState(std::vector<MachineMove> &Moves) const; //------------------------------------------------------------------------ // New methods added: diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td index a0b581f..9cd3c23 100644 --- a/lib/Target/CellSPU/SPUSchedule.td +++ b/lib/Target/CellSPU/SPUSchedule.td @@ -32,11 +32,12 @@ def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer) def ByteOp : InstrItinClass; // EVEN_UNIT def IntegerOp : InstrItinClass; // EVEN_UNIT def IntegerMulDiv: InstrItinClass; // EVEN_UNIT -def RotateShift : InstrItinClass; // EVEN_UNIT +def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector +def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad def ImmLoad : InstrItinClass; // EVEN_UNIT /* Note: The itinerary for the Cell SPU is somewhat contrived... */ -def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [ +def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [ InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>, InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>, InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>, @@ -51,7 +52,8 @@ def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [ InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>, InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>, InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>, - InstrItinData<RotateShift , [InstrStage<4, [EVEN_UNIT]>]>, + InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>, + InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>, InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>, InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]> ]>; diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp index 0f18b7f..07c8352 100644 --- a/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -14,6 +14,8 @@ #include "SPUSubtarget.h" #include "SPU.h" #include "SPUGenSubtarget.inc" +#include "llvm/ADT/SmallVector.h" +#include "SPURegisterInfo.h" using namespace llvm; @@ -34,3 +36,22 @@ SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) : /// producing code for the JIT. void SPUSubtarget::SetJITMode() { } + +/// Enable PostRA scheduling for optimization levels -O2 and -O3. +bool SPUSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtarget::ANTIDEP_CRITICAL; + // CriticalPathsRCs seems to be the set of + // RegisterClasses that antidep breakings are performed for. + // Do it for all register classes + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(&SPU::R8CRegClass); + CriticalPathRCs.push_back(&SPU::R16CRegClass); + CriticalPathRCs.push_back(&SPU::R32CRegClass); + CriticalPathRCs.push_back(&SPU::R32FPRegClass); + CriticalPathRCs.push_back(&SPU::R64CRegClass); + CriticalPathRCs.push_back(&SPU::VECREGRegClass); + return OptLevel >= CodeGenOpt::Default; +} diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h index 88201c6..d7929302 100644 --- a/lib/Target/CellSPU/SPUSubtarget.h +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -81,9 +81,13 @@ namespace llvm { /// properties of this subtarget. const char *getTargetDataString() const { return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" - "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128" + "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128" "-s:128:128-n32:64"; } + + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 480ec3f..3ed7361 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -29,7 +29,7 @@ extern "C" void LLVMInitializeCellSPUTarget() { } const std::pair<unsigned, int> * -SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const { +SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { NumEntries = 1; return &LR[0]; } @@ -40,7 +40,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, Subtarget(TT, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), - FrameInfo(*this), + FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { @@ -59,3 +59,12 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, PM.add(createSPUISelDag(*this)); return false; } + +// passes to run just before printing the assembly +bool SPUTargetMachine:: +addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +{ + //align instructions with nops/lnops for dual issue + PM.add(createSPUNopFillerPass(*this)); + return true; +} diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 7e02701..75abd5e 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -18,14 +18,14 @@ #include "SPUInstrInfo.h" #include "SPUISelLowering.h" #include "SPUSelectionDAGInfo.h" -#include "SPUFrameInfo.h" +#include "SPUFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" namespace llvm { class PassManager; class GlobalValue; -class TargetFrameInfo; +class TargetFrameLowering; /// SPUTargetMachine /// @@ -33,7 +33,7 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUSubtarget Subtarget; const TargetData DataLayout; SPUInstrInfo InstrInfo; - SPUFrameInfo FrameInfo; + SPUFrameLowering FrameLowering; SPUTargetLowering TLInfo; SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; @@ -48,8 +48,8 @@ public: virtual const SPUInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const SPUFrameInfo *getFrameInfo() const { - return &FrameInfo; + virtual const SPUFrameLowering *getFrameLowering() const { + return &FrameLowering; } /*! \note Cell SPU does not support JIT today. It could support JIT at some @@ -75,13 +75,14 @@ public: return &DataLayout; } - virtual const InstrItineraryData getInstrItineraryData() const { - return InstrItins; + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; } // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level); }; } // end namespace llvm diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt index f8182b8..e937559 100644 --- a/lib/Target/CppBackend/CMakeLists.txt +++ b/lib/Target/CppBackend/CMakeLists.txt @@ -1,3 +1,5 @@ add_llvm_target(CppBackend CPPBackend.cpp ) + +add_subdirectory(TargetInfo) diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index f08559f..71d6049 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -358,6 +358,7 @@ std::string CppWriter::getCppName(const Type* Ty) { case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; + case Type::X86_MMXTyID: return "Type::getX86_MMXTy(mod->getContext())"; default: error("Invalid primitive type"); break; @@ -1563,11 +1564,25 @@ void CppWriter::printFunctionUses(const Function* F) { // If the operand references a GVal or Constant, make a note of it if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) { gvs.insert(GV); - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - if (GVar->hasInitializer()) - consts.insert(GVar->getInitializer()); - } else if (Constant* C = dyn_cast<Constant>(operand)) + if (GenerationType != GenFunction) + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (GVar->hasInitializer()) + consts.insert(GVar->getInitializer()); + } else if (Constant* C = dyn_cast<Constant>(operand)) { consts.insert(C); + for (unsigned j = 0; j < C->getNumOperands(); ++j) { + // If the operand references a GVal or Constant, make a note of it + Value* operand = C->getOperand(j); + printType(operand->getType()); + if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) { + gvs.insert(GV); + if (GenerationType != GenFunction) + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (GVar->hasInitializer()) + consts.insert(GVar->getInitializer()); + } + } + } } } } @@ -1590,7 +1605,7 @@ void CppWriter::printFunctionUses(const Function* F) { printVariableHead(F); } -// Print the constants found + // Print the constants found nl(Out) << "// Constant Definitions"; nl(Out); for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(), E = consts.end(); I != E; ++I) { @@ -1600,11 +1615,13 @@ void CppWriter::printFunctionUses(const Function* F) { // Process the global variables definitions now that all the constants have // been emitted. These definitions just couple the gvars with their constant // initializers. - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I)) - printVariableBody(GV); + if (GenerationType != GenFunction) { + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I)) + printVariableBody(GV); + } } } diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt new file mode 100644 index 0000000..87e7cb5 --- /dev/null +++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMBlazeAsmParser + MBlazeAsmLexer.cpp + MBlazeAsmParser.cpp + ) + diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp new file mode 100644 index 0000000..1903796 --- /dev/null +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp @@ -0,0 +1,127 @@ +//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MBlaze.h" +#include "MBlazeTargetMachine.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" + +#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" + +#include <string> +#include <map> + +using namespace llvm; + +namespace { + + class MBlazeBaseAsmLexer : public TargetAsmLexer { + const MCAsmInfo &AsmInfo; + + const AsmToken &lexDefinite() { + return getLexer()->Lex(); + } + + AsmToken LexTokenUAL(); + protected: + typedef std::map <std::string, unsigned> rmap_ty; + + rmap_ty RegisterMap; + + void InitRegisterMap(const TargetRegisterInfo *info) { + unsigned numRegs = info->getNumRegs(); + + for (unsigned i = 0; i < numRegs; ++i) { + const char *regName = info->getName(i); + if (regName) + RegisterMap[regName] = i; + } + } + + unsigned MatchRegisterName(StringRef Name) { + rmap_ty::iterator iter = RegisterMap.find(Name.str()); + if (iter != RegisterMap.end()) + return iter->second; + else + return 0; + } + + AsmToken LexToken() { + if (!Lexer) { + SetError(SMLoc(), "No MCAsmLexer installed"); + return AsmToken(AsmToken::Error, "", 0); + } + + switch (AsmInfo.getAssemblerDialect()) { + default: + SetError(SMLoc(), "Unhandled dialect"); + return AsmToken(AsmToken::Error, "", 0); + case 0: + return LexTokenUAL(); + } + } + public: + MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) + : TargetAsmLexer(T), AsmInfo(MAI) { + } + }; + + class MBlazeAsmLexer : public MBlazeBaseAsmLexer { + public: + MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI) + : MBlazeBaseAsmLexer(T, MAI) { + std::string tripleString("mblaze-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } + }; +} + +AsmToken MBlazeBaseAsmLexer::LexTokenUAL() { + const AsmToken &lexedToken = lexDefinite(); + + switch (lexedToken.getKind()) { + default: + return AsmToken(lexedToken); + case AsmToken::Error: + SetError(Lexer->getErrLoc(), Lexer->getErr()); + return AsmToken(lexedToken); + case AsmToken::Identifier: + { + std::string upperCase = lexedToken.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + StringRef lowerRef(lowerCase); + + unsigned regID = MatchRegisterName(lowerRef); + + if (regID) { + return AsmToken(AsmToken::Register, + lexedToken.getString(), + static_cast<int64_t>(regID)); + } else { + return AsmToken(lexedToken); + } + } + } +} + +extern "C" void LLVMInitializeMBlazeAsmLexer() { + RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget); +} + diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp new file mode 100644 index 0000000..524f33d --- /dev/null +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -0,0 +1,568 @@ +//===-- MBlazeAsmParser.cpp - Parse MBlaze asm to MCInst instructions -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MBlaze.h" +#include "MBlazeSubtarget.h" +#include "MBlazeRegisterInfo.h" +#include "MBlazeISelLowering.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +namespace { +struct MBlazeOperand; + +class MBlazeAsmParser : public TargetAsmParser { + MCAsmParser &Parser; + TargetMachine &TM; + + MCAsmParser &getParser() const { return Parser; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + + MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + MBlazeOperand *ParseRegister(unsigned &RegNo); + MBlazeOperand *ParseImmediate(); + MBlazeOperand *ParseFsl(); + MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + + bool ParseDirectiveWord(unsigned Size, SMLoc L); + + bool MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out); + + /// @name Auto-generated Match Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "MBlazeGenAsmMatcher.inc" + + /// } + + +public: + MBlazeAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) + : TargetAsmParser(T), Parser(_Parser), TM(_TM) {} + + virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands); + + virtual bool ParseDirective(AsmToken DirectiveID); +}; + +/// MBlazeOperand - Instances of this class represent a parsed MBlaze machine +/// instruction. +struct MBlazeOperand : public MCParsedAsmOperand { + enum KindTy { + Token, + Immediate, + Register, + Memory, + Fsl + } Kind; + + SMLoc StartLoc, EndLoc; + + union { + struct { + const char *Data; + unsigned Length; + } Tok; + + struct { + unsigned RegNum; + } Reg; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned Base; + unsigned OffReg; + const MCExpr *Off; + } Mem; + + struct { + const MCExpr *Val; + } FslImm; + }; + + MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} +public: + MBlazeOperand(const MBlazeOperand &o) : MCParsedAsmOperand() { + Kind = o.Kind; + StartLoc = o.StartLoc; + EndLoc = o.EndLoc; + switch (Kind) { + case Register: + Reg = o.Reg; + break; + case Immediate: + Imm = o.Imm; + break; + case Token: + Tok = o.Tok; + break; + case Memory: + Mem = o.Mem; + break; + case Fsl: + FslImm = o.FslImm; + break; + } + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const { return StartLoc; } + + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const { return EndLoc; } + + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + const MCExpr *getFslImm() const { + assert(Kind == Fsl && "Invalid access!"); + return FslImm.Val; + } + + unsigned getMemBase() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.Base; + } + + const MCExpr* getMemOff() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.Off; + } + + unsigned getMemOffReg() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.OffReg; + } + + bool isToken() const { return Kind == Token; } + bool isImm() const { return Kind == Immediate; } + bool isMem() const { return Kind == Memory; } + bool isFsl() const { return Kind == Fsl; } + bool isReg() const { return Kind == Register; } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. Null MCExpr = 0. + if (Expr == 0) + Inst.addOperand(MCOperand::CreateImm(0)); + else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + + void addFslOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getFslImm()); + } + + void addMemOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateReg(getMemBase())); + + unsigned RegOff = getMemOffReg(); + if (RegOff) + Inst.addOperand(MCOperand::CreateReg(RegOff)); + else + addExpr(Inst, getMemOff()); + } + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + virtual void dump(raw_ostream &OS) const; + + static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) { + MBlazeOperand *Op = new MBlazeOperand(Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static MBlazeOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { + MBlazeOperand *Op = new MBlazeOperand(Register); + Op->Reg.RegNum = RegNum; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MBlazeOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { + MBlazeOperand *Op = new MBlazeOperand(Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MBlazeOperand *CreateFslImm(const MCExpr *Val, SMLoc S, SMLoc E) { + MBlazeOperand *Op = new MBlazeOperand(Fsl); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MBlazeOperand *CreateMem(unsigned Base, const MCExpr *Off, SMLoc S, + SMLoc E) { + MBlazeOperand *Op = new MBlazeOperand(Memory); + Op->Mem.Base = Base; + Op->Mem.Off = Off; + Op->Mem.OffReg = 0; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static MBlazeOperand *CreateMem(unsigned Base, unsigned Off, SMLoc S, + SMLoc E) { + MBlazeOperand *Op = new MBlazeOperand(Memory); + Op->Mem.Base = Base; + Op->Mem.OffReg = Off; + Op->Mem.Off = 0; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } +}; + +} // end anonymous namespace. + +void MBlazeOperand::dump(raw_ostream &OS) const { + switch (Kind) { + case Immediate: + getImm()->print(OS); + break; + case Register: + OS << "<register R"; + OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">"; + break; + case Token: + OS << "'" << getToken() << "'"; + break; + case Memory: { + OS << "<memory R"; + OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase()); + OS << ", "; + + unsigned RegOff = getMemOffReg(); + if (RegOff) + OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff); + else + OS << getMemOff(); + OS << ">"; + } + break; + case Fsl: + getFslImm()->print(OS); + break; + } +} + +/// @name Auto-generated Match Functions +/// { + +static unsigned MatchRegisterName(StringRef Name); + +/// } +// +bool MBlazeAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out) { + MCInst Inst; + SMLoc ErrorLoc; + unsigned ErrorInfo; + + switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) { + case Match_Success: + Out.EmitInstruction(Inst); + return false; + case Match_MissingFeature: + return Error(IDLoc, "instruction use requires an option to be enabled"); + case Match_MnemonicFail: + return Error(IDLoc, "unrecognized instruction mnemonic"); + case Match_ConversionFail: + return Error(IDLoc, "unable to convert operands to instruction"); + case Match_InvalidOperand: + ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((MBlazeOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + + llvm_unreachable("Implement any new match types added!"); + return true; +} + +MBlazeOperand *MBlazeAsmParser:: +ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + if (Operands.size() != 4) + return 0; + + MBlazeOperand &Base = *(MBlazeOperand*)Operands[2]; + MBlazeOperand &Offset = *(MBlazeOperand*)Operands[3]; + + SMLoc S = Base.getStartLoc(); + SMLoc O = Offset.getStartLoc(); + SMLoc E = Offset.getEndLoc(); + + if (!Base.isReg()) { + Error(S, "base address must be a register"); + return 0; + } + + if (!Offset.isReg() && !Offset.isImm()) { + Error(O, "offset must be a register or immediate"); + return 0; + } + + MBlazeOperand *Op; + if (Offset.isReg()) + Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getReg(), S, E); + else + Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getImm(), S, E); + + delete Operands.pop_back_val(); + delete Operands.pop_back_val(); + Operands.push_back(Op); + + return Op; +} + +bool MBlazeAsmParser::ParseRegister(unsigned &RegNo, + SMLoc &StartLoc, SMLoc &EndLoc) { + return (ParseRegister(RegNo) == 0); +} + +MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) { + SMLoc S = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + switch (getLexer().getKind()) { + default: return 0; + case AsmToken::Identifier: + RegNo = MatchRegisterName(getLexer().getTok().getIdentifier()); + if (RegNo == 0) + return 0; + + getLexer().Lex(); + return MBlazeOperand::CreateReg(RegNo, S, E); + } +} + +static unsigned MatchFslRegister(StringRef String) { + if (!String.startswith("rfsl")) + return -1; + + unsigned regNum; + if (String.substr(4).getAsInteger(10,regNum)) + return -1; + + return regNum; +} + +MBlazeOperand *MBlazeAsmParser::ParseFsl() { + SMLoc S = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + switch (getLexer().getKind()) { + default: return 0; + case AsmToken::Identifier: + unsigned reg = MatchFslRegister(getLexer().getTok().getIdentifier()); + if (reg >= 16) + return 0; + + getLexer().Lex(); + const MCExpr *EVal = MCConstantExpr::Create(reg,getContext()); + return MBlazeOperand::CreateFslImm(EVal,S,E); + } +} + +MBlazeOperand *MBlazeAsmParser::ParseImmediate() { + SMLoc S = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + + const MCExpr *EVal; + switch (getLexer().getKind()) { + default: return 0; + case AsmToken::LParen: + case AsmToken::Plus: + case AsmToken::Minus: + case AsmToken::Integer: + case AsmToken::Identifier: + if (getParser().ParseExpression(EVal)) + return 0; + + return MBlazeOperand::CreateImm(EVal, S, E); + } +} + +MBlazeOperand *MBlazeAsmParser:: +ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + MBlazeOperand *Op; + + // Attempt to parse the next token as a register name + unsigned RegNo; + Op = ParseRegister(RegNo); + + // Attempt to parse the next token as an FSL immediate + if (!Op) + Op = ParseFsl(); + + // Attempt to parse the next token as an immediate + if (!Op) + Op = ParseImmediate(); + + // If the token could not be parsed then fail + if (!Op) { + Error(Parser.getTok().getLoc(), "unknown operand"); + return 0; + } + + // Push the parsed operand into the list of operands + Operands.push_back(Op); + return Op; +} + +/// Parse an mblaze instruction mnemonic followed by its operands. +bool MBlazeAsmParser:: +ParseInstruction(StringRef Name, SMLoc NameLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // The first operands is the token for the instruction name + size_t dotLoc = Name.find('.'); + Operands.push_back(MBlazeOperand::CreateToken(Name.substr(0,dotLoc),NameLoc)); + if (dotLoc < Name.size()) + Operands.push_back(MBlazeOperand::CreateToken(Name.substr(dotLoc),NameLoc)); + + // If there are no more operands then finish + if (getLexer().is(AsmToken::EndOfStatement)) + return false; + + // Parse the first operand + if (!ParseOperand(Operands)) + return true; + + while (getLexer().isNot(AsmToken::EndOfStatement) && + getLexer().is(AsmToken::Comma)) { + // Consume the comma token + getLexer().Lex(); + + // Parse the next operand + if (!ParseOperand(Operands)) + return true; + } + + // If the instruction requires a memory operand then we need to + // replace the last two operands (base+offset) with a single + // memory operand. + if (Name.startswith("lw") || Name.startswith("sw") || + Name.startswith("lh") || Name.startswith("sh") || + Name.startswith("lb") || Name.startswith("sb")) + return (ParseMemory(Operands) == NULL); + + return false; +} + +/// ParseDirective parses the arm specific directives +bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".word") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + return true; +} + +/// ParseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + +extern "C" void LLVMInitializeMBlazeAsmLexer(); + +/// Force static initialization. +extern "C" void LLVMInitializeMBlazeAsmParser() { + RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget); + LLVMInitializeMBlazeAsmLexer(); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "MBlazeGenAsmMatcher.inc" diff --git a/lib/Target/MBlaze/AsmParser/Makefile b/lib/Target/MBlaze/AsmParser/Makefile new file mode 100644 index 0000000..611a0f4 --- /dev/null +++ b/lib/Target/MBlaze/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMBlazeAsmParser + +# Hack: we need to include 'main' MBlaze target directory for private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt deleted file mode 100644 index fac2c19..0000000 --- a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories( - ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - -add_llvm_library(LLVMMBlazeAsmPrinter - MBlazeAsmPrinter.cpp - ) -add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp deleted file mode 100644 index f4b30ad..0000000 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ /dev/null @@ -1,295 +0,0 @@ -//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format MBlaze assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "mblaze-asm-printer" - -#include "MBlaze.h" -#include "MBlazeSubtarget.h" -#include "MBlazeInstrInfo.h" -#include "MBlazeTargetMachine.h" -#include "MBlazeMachineFunction.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <cctype> - -using namespace llvm; - -namespace { - class MBlazeAsmPrinter : public AsmPrinter { - const MBlazeSubtarget *Subtarget; - public: - explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<MBlazeSubtarget>(); - } - - virtual const char *getPassName() const { - return "MBlaze Assembly Printer"; - } - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); - void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); - void printSavedRegsBitmask(raw_ostream &OS); - - void emitFrameDirective(); - - void printInstruction(const MachineInstr *MI, raw_ostream &O); - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd(); - static const char *getRegisterName(unsigned RegNo); - - virtual void EmitFunctionEntryLabel(); - }; -} // end of anonymous namespace - -#include "MBlazeGenAsmWriter.inc" - -//===----------------------------------------------------------------------===// -// -// MBlaze Asm Directives -// -// -- Frame directive "frame Stackpointer, Stacksize, RARegister" -// Describe the stack frame. -// -// -- Mask directives "mask bitmask, offset" -// Tells the assembler which registers are saved and where. -// bitmask - contain a little endian bitset indicating which registers are -// saved on function prologue (e.g. with a 0x80000000 mask, the -// assembler knows the register 31 (RA) is saved at prologue. -// offset - the position before stack pointer subtraction indicating where -// the first saved register on prologue is located. (e.g. with a -// -// Consider the following function prologue: -// -// .frame R19,48,R15 -// .mask 0xc0000000,-8 -// addiu R1, R1, -48 -// sw R15, 40(R1) -// sw R19, 36(R1) -// -// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and -// 19 (R19) are saved at prologue. As the save order on prologue is from -// left to right, R15 is saved first. A -8 offset means that after the -// stack pointer subtration, the first register in the mask (R15) will be -// saved at address 48-8=40. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Mask directives -//===----------------------------------------------------------------------===// - -// Print a 32 bit hex number with all numbers. -static void printHex32(unsigned int Value, raw_ostream &O) { - O << "0x"; - for (int i = 7; i >= 0; i--) - O << utohexstr((Value & (0xF << (i*4))) >> (i*4)); -} - - -// Create a bitmask with all callee saved registers for CPU or Floating Point -// registers. For CPU registers consider RA, GP and FP for saving if necessary. -void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { - const TargetRegisterInfo &RI = *TM.getRegisterInfo(); - const MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>(); - - // CPU Saved Registers Bitmasks - unsigned int CPUBitmask = 0; - - // Set the CPU Bitmasks - const MachineFrameInfo *MFI = MF->getFrameInfo(); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg); - if (MBlaze::CPURegsRegisterClass->contains(Reg)) - CPUBitmask |= (1 << RegNum); - } - - // Return Address and Frame registers must also be set in CPUBitmask. - if (RI.hasFP(*MF)) - CPUBitmask |= (1 << MBlazeRegisterInfo:: - getRegisterNumbering(RI.getFrameRegister(*MF))); - - if (MFI->adjustsStack()) - CPUBitmask |= (1 << MBlazeRegisterInfo:: - getRegisterNumbering(RI.getRARegister())); - - // Print CPUBitmask - O << "\t.mask \t"; printHex32(CPUBitmask, O); - O << ',' << MBlazeFI->getCPUTopSavedRegOff() << '\n'; -} - -//===----------------------------------------------------------------------===// -// Frame and Set directives -//===----------------------------------------------------------------------===// - -/// Frame Directive -void MBlazeAsmPrinter::emitFrameDirective() { - const TargetRegisterInfo &RI = *TM.getRegisterInfo(); - - unsigned stackReg = RI.getFrameRegister(*MF); - unsigned returnReg = RI.getRARegister(); - unsigned stackSize = MF->getFrameInfo()->getStackSize(); - - - OutStreamer.EmitRawText("\t.frame\t" + Twine(getRegisterName(stackReg)) + - "," + Twine(stackSize) + "," + - Twine(getRegisterName(returnReg))); -} - -void MBlazeAsmPrinter::EmitFunctionEntryLabel() { - OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); - OutStreamer.EmitLabel(CurrentFnSym); -} - -/// EmitFunctionBodyStart - Targets can override this to emit stuff before -/// the first basic block in the function. -void MBlazeAsmPrinter::EmitFunctionBodyStart() { - emitFrameDirective(); - - SmallString<128> Str; - raw_svector_ostream OS(Str); - printSavedRegsBitmask(OS); - OutStreamer.EmitRawText(OS.str()); -} - -/// EmitFunctionBodyEnd - Targets can override this to emit stuff after -/// the last basic block in the function. -void MBlazeAsmPrinter::EmitFunctionBodyEnd() { - OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); -} - -// Print out an operand for an inline asm expression. -bool MBlazeAsmPrinter:: -PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - - printOperand(MI, OpNo, O); - return false; -} - -void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - - switch (MO.getType()) { - case MachineOperand::MO_Register: - O << getRegisterName(MO.getReg()); - break; - - case MachineOperand::MO_Immediate: - O << (int)MO.getImm(); - break; - - case MachineOperand::MO_FPImmediate: { - const ConstantFP *fp = MO.getFPImm(); - printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O); - O << ";\t# immediate = " << *fp; - break; - } - - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - break; - - case MachineOperand::MO_ExternalSymbol: - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - break; - - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - break; - - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" - << getFunctionNumber() << "_" << MO.getIndex(); - if (MO.getOffset()) - O << "+" << MO.getOffset(); - break; - - default: - llvm_unreachable("<unknown operand type>"); - } -} - -void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - if (MO.isImm()) - O << (unsigned int)MO.getImm(); - else - printOperand(MI, opNum, O); -} - -void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - if (MO.isImm()) - O << "rfsl" << (unsigned int)MO.getImm(); - else - printOperand(MI, opNum, O); -} - -void MBlazeAsmPrinter:: -printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier) { - printOperand(MI, opNum+1, O); - O << ", "; - printOperand(MI, opNum, O); -} - -// Force static initialization. -extern "C" void LLVMInitializeMBlazeAsmPrinter() { - RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget); -} diff --git a/lib/Target/MBlaze/AsmPrinter/Makefile b/lib/Target/MBlaze/AsmPrinter/Makefile deleted file mode 100644 index c44651c..0000000 --- a/lib/Target/MBlaze/AsmPrinter/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMBlazeAsmPrinter - -# Hack: we need to include 'main' MBlaze target directory to grab -# private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index 7f85bf8..004057a 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -5,17 +5,21 @@ tablegen(MBlazeGenRegisterNames.inc -gen-register-enums) tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc) tablegen(MBlazeGenInstrNames.inc -gen-instr-enums) tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc) +tablegen(MBlazeGenCodeEmitter.inc -gen-emitter) tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer) +tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher) tablegen(MBlazeGenDAGISel.inc -gen-dag-isel) tablegen(MBlazeGenCallingConv.inc -gen-callingconv) tablegen(MBlazeGenSubtarget.inc -gen-subtarget) tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic) +tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info) add_llvm_target(MBlazeCodeGen MBlazeDelaySlotFiller.cpp MBlazeInstrInfo.cpp MBlazeISelDAGToDAG.cpp MBlazeISelLowering.cpp + MBlazeFrameLowering.cpp MBlazeMCAsmInfo.cpp MBlazeRegisterInfo.cpp MBlazeSubtarget.cpp @@ -23,6 +27,14 @@ add_llvm_target(MBlazeCodeGen MBlazeTargetObjectFile.cpp MBlazeIntrinsicInfo.cpp MBlazeSelectionDAGInfo.cpp + MBlazeAsmPrinter.cpp + MBlazeAsmBackend.cpp + MBlazeMCInstLower.cpp + MBlazeELFWriterInfo.cpp + MBlazeMCCodeEmitter.cpp ) -target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG) +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..9376e68 --- /dev/null +++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt @@ -0,0 +1,16 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMBlazeDisassembler + MBlazeDisassembler.cpp + ) + +# workaround for hanging compilation on MSVC9 and 10 +if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) +set_property( + SOURCE MBlazeDisassembler.cpp + PROPERTY COMPILE_FLAGS "/Od" + ) +endif() + +add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen) diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp new file mode 100644 index 0000000..3379ac2 --- /dev/null +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -0,0 +1,647 @@ +//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the MBlaze Disassembler. It contains code to translate +// the data produced by the decoder into MCInsts. +// +//===----------------------------------------------------------------------===// + +#include "MBlaze.h" +#include "MBlazeInstrInfo.h" +#include "MBlazeDisassembler.h" + +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/raw_ostream.h" + +// #include "MBlazeGenDecoderTables.inc" +// #include "MBlazeGenRegisterNames.inc" +#include "MBlazeGenInstrInfo.inc" +#include "MBlazeGenEDInfo.inc" + +using namespace llvm; + +const unsigned UNSUPPORTED = -1; + +static unsigned mblazeBinary2Opcode[] = { + MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03 + MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07 + MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B + MBlaze::ADDIK, MBlaze::RSUBIK, MBlaze::ADDIKC, MBlaze::RSUBIKC, //0C,0D,0E,0F + + MBlaze::MUL, MBlaze::BSRL, MBlaze::IDIV, MBlaze::GETD, //10,11,12,13 + UNSUPPORTED, UNSUPPORTED, MBlaze::FADD, UNSUPPORTED, //14,15,16,17 + MBlaze::MULI, MBlaze::BSRLI, UNSUPPORTED, MBlaze::GET, //18,19,1A,1B + UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, UNSUPPORTED, //1C,1D,1E,1F + + MBlaze::OR, MBlaze::AND, MBlaze::XOR, MBlaze::ANDN, //20,21,22,23 + MBlaze::SEXT8, MBlaze::MFS, MBlaze::BR, MBlaze::BEQ, //24,25,26,27 + MBlaze::ORI, MBlaze::ANDI, MBlaze::XORI, MBlaze::ANDNI, //28,29,2A,2B + MBlaze::IMM, MBlaze::RTSD, MBlaze::BRI, MBlaze::BEQI, //2C,2D,2E,2F + + MBlaze::LBU, MBlaze::LHU, MBlaze::LW, UNSUPPORTED, //30,31,32,33 + MBlaze::SB, MBlaze::SH, MBlaze::SW, UNSUPPORTED, //34,35,36,37 + MBlaze::LBUI, MBlaze::LHUI, MBlaze::LWI, UNSUPPORTED, //38,39,3A,3B + MBlaze::SBI, MBlaze::SHI, MBlaze::SWI, UNSUPPORTED, //3C,3D,3E,3F +}; + +static unsigned getRD(uint32_t insn) { + return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F); +} + +static unsigned getRA(uint32_t insn) { + return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F); +} + +static unsigned getRB(uint32_t insn) { + return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F); +} + +static int64_t getRS(uint32_t insn) { + return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF); +} + +static int64_t getIMM(uint32_t insn) { + int16_t val = (insn & 0xFFFF); + return val; +} + +static int64_t getSHT(uint32_t insn) { + int16_t val = (insn & 0x1F); + return val; +} + +static unsigned getFLAGS(int32_t insn) { + return (insn & 0x7FF); +} + +static int64_t getFSL(uint32_t insn) { + int16_t val = (insn & 0xF); + return val; +} + +static unsigned decodeMUL(uint32_t insn) { + switch (getFLAGS(insn)) { + default: return UNSUPPORTED; + case 0: return MBlaze::MUL; + case 1: return MBlaze::MULH; + case 2: return MBlaze::MULHSU; + case 3: return MBlaze::MULHU; + } +} + +static unsigned decodeSEXT(uint32_t insn) { + switch (insn&0x7FF) { + default: return UNSUPPORTED; + case 0x60: return MBlaze::SEXT8; + case 0x68: return MBlaze::WIC; + case 0x64: return MBlaze::WDC; + case 0x66: return MBlaze::WDCC; + case 0x74: return MBlaze::WDCF; + case 0x61: return MBlaze::SEXT16; + case 0x41: return MBlaze::SRL; + case 0x21: return MBlaze::SRC; + case 0x01: return MBlaze::SRA; + } +} + +static unsigned decodeBEQ(uint32_t insn) { + switch ((insn>>21)&0x1F) { + default: return UNSUPPORTED; + case 0x00: return MBlaze::BEQ; + case 0x10: return MBlaze::BEQD; + case 0x05: return MBlaze::BGE; + case 0x15: return MBlaze::BGED; + case 0x04: return MBlaze::BGT; + case 0x14: return MBlaze::BGTD; + case 0x03: return MBlaze::BLE; + case 0x13: return MBlaze::BLED; + case 0x02: return MBlaze::BLT; + case 0x12: return MBlaze::BLTD; + case 0x01: return MBlaze::BNE; + case 0x11: return MBlaze::BNED; + } +} + +static unsigned decodeBEQI(uint32_t insn) { + switch ((insn>>21)&0x1F) { + default: return UNSUPPORTED; + case 0x00: return MBlaze::BEQI; + case 0x10: return MBlaze::BEQID; + case 0x05: return MBlaze::BGEI; + case 0x15: return MBlaze::BGEID; + case 0x04: return MBlaze::BGTI; + case 0x14: return MBlaze::BGTID; + case 0x03: return MBlaze::BLEI; + case 0x13: return MBlaze::BLEID; + case 0x02: return MBlaze::BLTI; + case 0x12: return MBlaze::BLTID; + case 0x01: return MBlaze::BNEI; + case 0x11: return MBlaze::BNEID; + } +} + +static unsigned decodeBR(uint32_t insn) { + switch ((insn>>16)&0x1F) { + default: return UNSUPPORTED; + case 0x00: return MBlaze::BR; + case 0x08: return MBlaze::BRA; + case 0x0C: return MBlaze::BRK; + case 0x10: return MBlaze::BRD; + case 0x14: return MBlaze::BRLD; + case 0x18: return MBlaze::BRAD; + case 0x1C: return MBlaze::BRALD; + } +} + +static unsigned decodeBRI(uint32_t insn) { + switch ((insn>>16)&0x1F) { + default: return UNSUPPORTED; + case 0x00: return MBlaze::BRI; + case 0x08: return MBlaze::BRAI; + case 0x0C: return MBlaze::BRKI; + case 0x10: return MBlaze::BRID; + case 0x14: return MBlaze::BRLID; + case 0x18: return MBlaze::BRAID; + case 0x1C: return MBlaze::BRALID; + } +} + +static unsigned decodeBSRL(uint32_t insn) { + switch ((insn>>9)&0x3) { + default: return UNSUPPORTED; + case 0x2: return MBlaze::BSLL; + case 0x1: return MBlaze::BSRA; + case 0x0: return MBlaze::BSRL; + } +} + +static unsigned decodeBSRLI(uint32_t insn) { + switch ((insn>>9)&0x3) { + default: return UNSUPPORTED; + case 0x2: return MBlaze::BSLLI; + case 0x1: return MBlaze::BSRAI; + case 0x0: return MBlaze::BSRLI; + } +} + +static unsigned decodeRSUBK(uint32_t insn) { + switch (getFLAGS(insn)) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::RSUBK; + case 0x1: return MBlaze::CMP; + case 0x3: return MBlaze::CMPU; + } +} + +static unsigned decodeFADD(uint32_t insn) { + switch (getFLAGS(insn)) { + default: return UNSUPPORTED; + case 0x000: return MBlaze::FADD; + case 0x080: return MBlaze::FRSUB; + case 0x100: return MBlaze::FMUL; + case 0x180: return MBlaze::FDIV; + case 0x200: return MBlaze::FCMP_UN; + case 0x210: return MBlaze::FCMP_LT; + case 0x220: return MBlaze::FCMP_EQ; + case 0x230: return MBlaze::FCMP_LE; + case 0x240: return MBlaze::FCMP_GT; + case 0x250: return MBlaze::FCMP_NE; + case 0x260: return MBlaze::FCMP_GE; + case 0x280: return MBlaze::FLT; + case 0x300: return MBlaze::FINT; + case 0x380: return MBlaze::FSQRT; + } +} + +static unsigned decodeGET(uint32_t insn) { + switch ((insn>>10)&0x3F) { + default: return UNSUPPORTED; + case 0x00: return MBlaze::GET; + case 0x01: return MBlaze::EGET; + case 0x02: return MBlaze::AGET; + case 0x03: return MBlaze::EAGET; + case 0x04: return MBlaze::TGET; + case 0x05: return MBlaze::TEGET; + case 0x06: return MBlaze::TAGET; + case 0x07: return MBlaze::TEAGET; + case 0x08: return MBlaze::CGET; + case 0x09: return MBlaze::ECGET; + case 0x0A: return MBlaze::CAGET; + case 0x0B: return MBlaze::ECAGET; + case 0x0C: return MBlaze::TCGET; + case 0x0D: return MBlaze::TECGET; + case 0x0E: return MBlaze::TCAGET; + case 0x0F: return MBlaze::TECAGET; + case 0x10: return MBlaze::NGET; + case 0x11: return MBlaze::NEGET; + case 0x12: return MBlaze::NAGET; + case 0x13: return MBlaze::NEAGET; + case 0x14: return MBlaze::TNGET; + case 0x15: return MBlaze::TNEGET; + case 0x16: return MBlaze::TNAGET; + case 0x17: return MBlaze::TNEAGET; + case 0x18: return MBlaze::NCGET; + case 0x19: return MBlaze::NECGET; + case 0x1A: return MBlaze::NCAGET; + case 0x1B: return MBlaze::NECAGET; + case 0x1C: return MBlaze::TNCGET; + case 0x1D: return MBlaze::TNECGET; + case 0x1E: return MBlaze::TNCAGET; + case 0x1F: return MBlaze::TNECAGET; + case 0x20: return MBlaze::PUT; + case 0x22: return MBlaze::APUT; + case 0x24: return MBlaze::TPUT; + case 0x26: return MBlaze::TAPUT; + case 0x28: return MBlaze::CPUT; + case 0x2A: return MBlaze::CAPUT; + case 0x2C: return MBlaze::TCPUT; + case 0x2E: return MBlaze::TCAPUT; + case 0x30: return MBlaze::NPUT; + case 0x32: return MBlaze::NAPUT; + case 0x34: return MBlaze::TNPUT; + case 0x36: return MBlaze::TNAPUT; + case 0x38: return MBlaze::NCPUT; + case 0x3A: return MBlaze::NCAPUT; + case 0x3C: return MBlaze::TNCPUT; + case 0x3E: return MBlaze::TNCAPUT; + } +} + +static unsigned decodeGETD(uint32_t insn) { + switch ((insn>>5)&0x3F) { + default: return UNSUPPORTED; + case 0x00: return MBlaze::GETD; + case 0x01: return MBlaze::EGETD; + case 0x02: return MBlaze::AGETD; + case 0x03: return MBlaze::EAGETD; + case 0x04: return MBlaze::TGETD; + case 0x05: return MBlaze::TEGETD; + case 0x06: return MBlaze::TAGETD; + case 0x07: return MBlaze::TEAGETD; + case 0x08: return MBlaze::CGETD; + case 0x09: return MBlaze::ECGETD; + case 0x0A: return MBlaze::CAGETD; + case 0x0B: return MBlaze::ECAGETD; + case 0x0C: return MBlaze::TCGETD; + case 0x0D: return MBlaze::TECGETD; + case 0x0E: return MBlaze::TCAGETD; + case 0x0F: return MBlaze::TECAGETD; + case 0x10: return MBlaze::NGETD; + case 0x11: return MBlaze::NEGETD; + case 0x12: return MBlaze::NAGETD; + case 0x13: return MBlaze::NEAGETD; + case 0x14: return MBlaze::TNGETD; + case 0x15: return MBlaze::TNEGETD; + case 0x16: return MBlaze::TNAGETD; + case 0x17: return MBlaze::TNEAGETD; + case 0x18: return MBlaze::NCGETD; + case 0x19: return MBlaze::NECGETD; + case 0x1A: return MBlaze::NCAGETD; + case 0x1B: return MBlaze::NECAGETD; + case 0x1C: return MBlaze::TNCGETD; + case 0x1D: return MBlaze::TNECGETD; + case 0x1E: return MBlaze::TNCAGETD; + case 0x1F: return MBlaze::TNECAGETD; + case 0x20: return MBlaze::PUTD; + case 0x22: return MBlaze::APUTD; + case 0x24: return MBlaze::TPUTD; + case 0x26: return MBlaze::TAPUTD; + case 0x28: return MBlaze::CPUTD; + case 0x2A: return MBlaze::CAPUTD; + case 0x2C: return MBlaze::TCPUTD; + case 0x2E: return MBlaze::TCAPUTD; + case 0x30: return MBlaze::NPUTD; + case 0x32: return MBlaze::NAPUTD; + case 0x34: return MBlaze::TNPUTD; + case 0x36: return MBlaze::TNAPUTD; + case 0x38: return MBlaze::NCPUTD; + case 0x3A: return MBlaze::NCAPUTD; + case 0x3C: return MBlaze::TNCPUTD; + case 0x3E: return MBlaze::TNCAPUTD; + } +} + +static unsigned decodeIDIV(uint32_t insn) { + switch (insn&0x3) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::IDIV; + case 0x2: return MBlaze::IDIVU; + } +} + +static unsigned decodeLBU(uint32_t insn) { + switch ((insn>>9)&0x1) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::LBU; + case 0x1: return MBlaze::LBUR; + } +} + +static unsigned decodeLHU(uint32_t insn) { + switch ((insn>>9)&0x1) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::LHU; + case 0x1: return MBlaze::LHUR; + } +} + +static unsigned decodeLW(uint32_t insn) { + switch ((insn>>9)&0x3) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::LW; + case 0x1: return MBlaze::LWR; + case 0x2: return MBlaze::LWX; + } +} + +static unsigned decodeSB(uint32_t insn) { + switch ((insn>>9)&0x1) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::SB; + case 0x1: return MBlaze::SBR; + } +} + +static unsigned decodeSH(uint32_t insn) { + switch ((insn>>9)&0x1) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::SH; + case 0x1: return MBlaze::SHR; + } +} + +static unsigned decodeSW(uint32_t insn) { + switch ((insn>>9)&0x3) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::SW; + case 0x1: return MBlaze::SWR; + case 0x2: return MBlaze::SWX; + } +} + +static unsigned decodeMFS(uint32_t insn) { + switch ((insn>>15)&0x1) { + default: return UNSUPPORTED; + case 0x0: + switch ((insn>>16)&0x1) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::MSRSET; + case 0x1: return MBlaze::MSRCLR; + } + case 0x1: + switch ((insn>>14)&0x1) { + default: return UNSUPPORTED; + case 0x0: return MBlaze::MFS; + case 0x1: return MBlaze::MTS; + } + } +} + +static unsigned decodeOR(uint32_t insn) { + switch (getFLAGS(insn)) { + default: return UNSUPPORTED; + case 0x000: return MBlaze::OR; + case 0x400: return MBlaze::PCMPBF; + } +} + +static unsigned decodeXOR(uint32_t insn) { + switch (getFLAGS(insn)) { + default: return UNSUPPORTED; + case 0x000: return MBlaze::XOR; + case 0x400: return MBlaze::PCMPEQ; + } +} + +static unsigned decodeANDN(uint32_t insn) { + switch (getFLAGS(insn)) { + default: return UNSUPPORTED; + case 0x000: return MBlaze::ANDN; + case 0x400: return MBlaze::PCMPNE; + } +} + +static unsigned decodeRTSD(uint32_t insn) { + switch ((insn>>21)&0x1F) { + default: return UNSUPPORTED; + case 0x10: return MBlaze::RTSD; + case 0x11: return MBlaze::RTID; + case 0x12: return MBlaze::RTBD; + case 0x14: return MBlaze::RTED; + } +} + +static unsigned getOPCODE(uint32_t insn) { + unsigned opcode = mblazeBinary2Opcode[ (insn>>26)&0x3F ]; + switch (opcode) { + case MBlaze::MUL: return decodeMUL(insn); + case MBlaze::SEXT8: return decodeSEXT(insn); + case MBlaze::BEQ: return decodeBEQ(insn); + case MBlaze::BEQI: return decodeBEQI(insn); + case MBlaze::BR: return decodeBR(insn); + case MBlaze::BRI: return decodeBRI(insn); + case MBlaze::BSRL: return decodeBSRL(insn); + case MBlaze::BSRLI: return decodeBSRLI(insn); + case MBlaze::RSUBK: return decodeRSUBK(insn); + case MBlaze::FADD: return decodeFADD(insn); + case MBlaze::GET: return decodeGET(insn); + case MBlaze::GETD: return decodeGETD(insn); + case MBlaze::IDIV: return decodeIDIV(insn); + case MBlaze::LBU: return decodeLBU(insn); + case MBlaze::LHU: return decodeLHU(insn); + case MBlaze::LW: return decodeLW(insn); + case MBlaze::SB: return decodeSB(insn); + case MBlaze::SH: return decodeSH(insn); + case MBlaze::SW: return decodeSW(insn); + case MBlaze::MFS: return decodeMFS(insn); + case MBlaze::OR: return decodeOR(insn); + case MBlaze::XOR: return decodeXOR(insn); + case MBlaze::ANDN: return decodeANDN(insn); + case MBlaze::RTSD: return decodeRTSD(insn); + default: return opcode; + } +} + +EDInstInfo *MBlazeDisassembler::getEDInfo() const { + return instInfoMBlaze; +} + +// +// Public interface for the disassembler +// + +bool MBlazeDisassembler::getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream) const { + // The machine instruction. + uint32_t insn; + uint8_t bytes[4]; + + // We always consume 4 bytes of data + size = 4; + + // We want to read exactly 4 bytes of data. + if (region.readBytes(address, 4, (uint8_t*)bytes, NULL) == -1) + return false; + + // Encoded as a big-endian 32-bit word in the stream. + insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0); + + // Get the MCInst opcode from the binary instruction and make sure + // that it is a valid instruction. + unsigned opcode = getOPCODE(insn); + if (opcode == UNSUPPORTED) + return false; + + instr.setOpcode(opcode); + + uint64_t tsFlags = MBlazeInsts[opcode].TSFlags; + switch ((tsFlags & MBlazeII::FormMask)) { + default: llvm_unreachable("unknown instruction encoding"); + + case MBlazeII::FRRRR: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateReg(getRB(insn))); + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + break; + + case MBlazeII::FRRR: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateReg(getRB(insn))); + break; + + case MBlazeII::FRI: + switch (opcode) { + default: llvm_unreachable("unknown instruction encoding"); + case MBlaze::MFS: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); + break; + case MBlaze::MTS: + instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + break; + case MBlaze::MSRSET: + case MBlaze::MSRCLR: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateImm(insn&0x7FFF)); + break; + } + break; + + case MBlazeII::FRRI: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + switch (opcode) { + default: + instr.addOperand(MCOperand::CreateImm(getIMM(insn))); + break; + case MBlaze::BSRLI: + case MBlaze::BSRAI: + case MBlaze::BSLLI: + instr.addOperand(MCOperand::CreateImm(insn&0x1F)); + break; + } + break; + + case MBlazeII::FCRR: + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateReg(getRB(insn))); + break; + + case MBlazeII::FCRI: + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateImm(getIMM(insn))); + break; + + case MBlazeII::FRCR: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateReg(getRB(insn))); + break; + + case MBlazeII::FRCI: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateImm(getIMM(insn))); + break; + + case MBlazeII::FCCR: + instr.addOperand(MCOperand::CreateReg(getRB(insn))); + break; + + case MBlazeII::FCCI: + instr.addOperand(MCOperand::CreateImm(getIMM(insn))); + break; + + case MBlazeII::FRRCI: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateImm(getSHT(insn))); + break; + + case MBlazeII::FRRC: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + break; + + case MBlazeII::FRCX: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateImm(getFSL(insn))); + break; + + case MBlazeII::FRCS: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateReg(getRS(insn))); + break; + + case MBlazeII::FCRCS: + instr.addOperand(MCOperand::CreateReg(getRS(insn))); + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + break; + + case MBlazeII::FCRCX: + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateImm(getFSL(insn))); + break; + + case MBlazeII::FCX: + instr.addOperand(MCOperand::CreateImm(getFSL(insn))); + break; + + case MBlazeII::FCR: + instr.addOperand(MCOperand::CreateReg(getRB(insn))); + break; + + case MBlazeII::FRIR: + instr.addOperand(MCOperand::CreateReg(getRD(insn))); + instr.addOperand(MCOperand::CreateImm(getIMM(insn))); + instr.addOperand(MCOperand::CreateReg(getRA(insn))); + break; + } + + return true; +} + +static MCDisassembler *createMBlazeDisassembler(const Target &T) { + return new MBlazeDisassembler; +} + +extern "C" void LLVMInitializeMBlazeDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheMBlazeTarget, + createMBlazeDisassembler); +} diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h new file mode 100644 index 0000000..d05eced --- /dev/null +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h @@ -0,0 +1,55 @@ +//===- MBlazeDisassembler.h - Disassembler for MicroBlaze ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the MBlaze Disassembler. It it the header for +// MBlazeDisassembler, a subclass of MCDisassembler. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZEDISASSEMBLER_H +#define MBLAZEDISASSEMBLER_H + +#include "llvm/MC/MCDisassembler.h" + +struct InternalInstruction; + +namespace llvm { + +class MCInst; +class MemoryObject; +class raw_ostream; + +struct EDInstInfo; + +/// MBlazeDisassembler - Disassembler for all MBlaze platforms. +class MBlazeDisassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + MBlazeDisassembler() : + MCDisassembler() { + } + + ~MBlazeDisassembler() { + } + + /// getInstruction - See MCDisassembler. + bool getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/MBlaze/Disassembler/Makefile b/lib/Target/MBlaze/Disassembler/Makefile new file mode 100644 index 0000000..0530b32 --- /dev/null +++ b/lib/Target/MBlaze/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMMBlazeDisassembler + +# Hack: we need to include 'main' MBlaze target directory to grab headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..242a573 --- /dev/null +++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMBlazeAsmPrinter + MBlazeInstPrinter.cpp + ) + +add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp new file mode 100644 index 0000000..a7fd287 --- /dev/null +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp @@ -0,0 +1,69 @@ +//===-- MBlazeInstPrinter.cpp - Convert MBlaze MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an MBlaze MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "MBlaze.h" +#include "MBlazeInstPrinter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + + +// Include the auto-generated portion of the assembly writer. +#include "MBlazeGenAsmWriter.inc" + +void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { + printInstruction(MI, O); +} + +void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, const char *Modifier) { + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + O << getRegisterName(Op.getReg()); + } else if (Op.isImm()) { + O << (int32_t)Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << *Op.getExpr(); + } +} + +void MBlazeInstPrinter::printFSLImm(const MCInst *MI, int OpNo, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNo); + if (MO.isImm()) + O << "rfsl" << MO.getImm(); + else + printOperand(MI, OpNo, O, NULL); +} + +void MBlazeInstPrinter::printUnsignedImm(const MCInst *MI, int OpNo, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNo); + if (MO.isImm()) + O << (uint32_t)MO.getImm(); + else + printOperand(MI, OpNo, O, NULL); +} + +void MBlazeInstPrinter::printMemOperand(const MCInst *MI, int OpNo, + raw_ostream &O, const char *Modifier) { + printOperand(MI, OpNo, O, NULL); + O << ", "; + printOperand(MI, OpNo+1, O, NULL); +} diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h new file mode 100644 index 0000000..bebc6c8 --- /dev/null +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -0,0 +1,43 @@ +//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a MBlaze MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZEINSTPRINTER_H +#define MBLAZEINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + + class MBlazeInstPrinter : public MCInstPrinter { + public: + MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { + } + + virtual void printInst(const MCInst *MI, raw_ostream &O); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + const char *Modifier = 0); + void printFSLImm(const MCInst *MI, int OpNo, raw_ostream &O); + void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, int OpNo,raw_ostream &O, + const char *Modifier = 0); + }; +} + +#endif diff --git a/lib/Target/MBlaze/InstPrinter/Makefile b/lib/Target/MBlaze/InstPrinter/Makefile new file mode 100644 index 0000000..9fb6e86 --- /dev/null +++ b/lib/Target/MBlaze/InstPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMBlazeAsmPrinter + +# Hack: we need to include 'main' MBlaze target directory to grab +# private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h index f9d828b..00c73f0 100644 --- a/lib/Target/MBlaze/MBlaze.h +++ b/lib/Target/MBlaze/MBlaze.h @@ -21,8 +21,16 @@ namespace llvm { class MBlazeTargetMachine; class FunctionPass; class MachineCodeEmitter; + class MCCodeEmitter; + class TargetAsmBackend; class formatted_raw_ostream; + MCCodeEmitter *createMBlazeMCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx); + + TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &); + FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM); FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM); diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index 3815b6d..1fa1e4d 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -32,35 +32,35 @@ def MBlazeInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true", - "Implements 3-stage pipeline.">; + "Implements 3-stage pipeline">; def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true", - "Implements barrel shifter.">; + "Implements barrel shifter">; def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true", - "Implements hardware divider.">; + "Implements hardware divider">; def FeatureMul : SubtargetFeature<"mul", "HasMul", "true", - "Implements hardware multiplier.">; + "Implements hardware multiplier">; def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true", - "Implements FSL instructions.">; + "Implements FSL instructions">; def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true", - "Implements extended FSL instructions.">; + "Implements extended FSL instructions">; def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true", - "Implements MSR register set and clear.">; + "Implements MSR register set and clear">; def FeatureException : SubtargetFeature<"exception", "HasException", "true", - "Implements hardware exception support.">; + "Implements hardware exception support">; def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true", - "Implements pattern compare instruction.">; + "Implements pattern compare instruction">; def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true", - "Implements floating point unit.">; + "Implements floating point unit">; def FeatureESR : SubtargetFeature<"esr", "HasESR", "true", "Implements ESR and EAR registers">; def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true", - "Implements processor version register.">; + "Implements processor version register">; def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true", "Implements multiplier with 64-bit result">; def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true", - "Implements sqrt and floating point convert.">; + "Implements sqrt and floating point convert">; def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true", - "Implements memory management unit.">; + "Implements memory management unit">; //===----------------------------------------------------------------------===// // MBlaze processors supported. @@ -69,13 +69,26 @@ def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, MBlazeGenericItineraries, Features>; - def : Proc<"v400", []>; def : Proc<"v500", []>; def : Proc<"v600", []>; def : Proc<"v700", []>; def : Proc<"v710", []>; +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +def MBlazeAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + def MBlaze : Target { let InstructionSet = MBlazeInstrInfo; + let AssemblyWriters = [MBlazeAsmWriter]; } diff --git a/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MBlazeAsmBackend.cpp new file mode 100644 index 0000000..a4b21af --- /dev/null +++ b/lib/Target/MBlaze/MBlazeAsmBackend.cpp @@ -0,0 +1,163 @@ +//===-- MBlazeAsmBackend.cpp - MBlaze Assembler Backend -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +#include "MBlaze.h" +#include "MBlazeELFWriterInfo.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +static unsigned getFixupKindSize(unsigned Kind) { + switch (Kind) { + default: assert(0 && "invalid fixup kind!"); + case FK_Data_1: return 1; + case FK_PCRel_2: + case FK_Data_2: return 2; + case FK_PCRel_4: + case FK_Data_4: return 4; + case FK_Data_8: return 8; + } +} + + +namespace { +class MBlazeELFObjectWriter : public MCELFObjectTargetWriter { +public: + MBlazeELFObjectWriter(Triple::OSType OSType) + : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE, + /*HasRelocationAddend*/ true) {} +}; + +class MBlazeAsmBackend : public TargetAsmBackend { +public: + MBlazeAsmBackend(const Target &T) + : TargetAsmBackend() { + } + + unsigned getNumFixupKinds() const { + return 2; + } + + bool MayNeedRelaxation(const MCInst &Inst) const; + + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; + + bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; + + unsigned getPointerSize() const { + return 4; + } +}; + +static unsigned getRelaxedOpcode(unsigned Op) { + switch (Op) { + default: return Op; + case MBlaze::ADDIK: return MBlaze::ADDIK32; + case MBlaze::ORI: return MBlaze::ORI32; + case MBlaze::BRLID: return MBlaze::BRLID32; + } +} + +bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { + if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode()) + return false; + + bool hasExprOrImm = false; + for (unsigned i = 0; i < Inst.getNumOperands(); ++i) + hasExprOrImm |= Inst.getOperand(i).isExpr(); + + return hasExprOrImm; +} + +void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + Res = Inst; + Res.setOpcode(getRelaxedOpcode(Inst.getOpcode())); +} + +bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + if ((Count % 4) != 0) + return false; + + for (uint64_t i = 0; i < Count; i += 4) + OW->Write32(0x00000000); + + return true; +} +} // end anonymous namespace + +namespace { +class ELFMBlazeAsmBackend : public MBlazeAsmBackend { +public: + Triple::OSType OSType; + ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType) + : MBlazeAsmBackend(T), OSType(_OSType) { } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS, + /*IsLittleEndian*/ false); + } +}; + +void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value) const { + unsigned Size = getFixupKindSize(Fixup.getKind()); + + assert(Fixup.getOffset() + Size <= DataSize && + "Invalid fixup offset!"); + + char *data = Data + Fixup.getOffset(); + switch (Size) { + default: llvm_unreachable("Cannot fixup unknown value."); + case 1: llvm_unreachable("Cannot fixup 1 byte value."); + case 8: llvm_unreachable("Cannot fixup 8 byte value."); + + case 4: + *(data+7) = uint8_t(Value); + *(data+6) = uint8_t(Value >> 8); + *(data+3) = uint8_t(Value >> 16); + *(data+2) = uint8_t(Value >> 24); + break; + + case 2: + *(data+3) = uint8_t(Value >> 0); + *(data+2) = uint8_t(Value >> 8); + } +} +} // end anonymous namespace + +TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, + const std::string &TT) { + switch (Triple(TT).getOS()) { + case Triple::Darwin: + assert(0 && "Mac not supported on MBlaze"); + case Triple::MinGW32: + case Triple::Cygwin: + case Triple::Win32: + assert(0 && "Windows not supported on MBlaze"); + default: + return new ELFMBlazeAsmBackend(T, Triple(TT).getOS()); + } +} diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp new file mode 100644 index 0000000..0016df5 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -0,0 +1,335 @@ +//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format MBlaze assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-asm-printer" + +#include "MBlaze.h" +#include "MBlazeSubtarget.h" +#include "MBlazeInstrInfo.h" +#include "MBlazeTargetMachine.h" +#include "MBlazeMachineFunction.h" +#include "MBlazeMCInstLower.h" +#include "InstPrinter/MBlazeInstPrinter.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cctype> + +using namespace llvm; + +namespace { + class MBlazeAsmPrinter : public AsmPrinter { + const MBlazeSubtarget *Subtarget; + public: + explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<MBlazeSubtarget>(); + } + + virtual const char *getPassName() const { + return "MBlaze Assembly Printer"; + } + + void printSavedRegsBitmask(); + void emitFrameDirective(); + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + virtual void EmitFunctionEntryLabel(); + + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const; + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); + void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier = 0); + + void EmitInstruction(const MachineInstr *MI); + }; +} // end of anonymous namespace + +// #include "MBlazeGenAsmWriter.inc" + +//===----------------------------------------------------------------------===// +// +// MBlaze Asm Directives +// +// -- Frame directive "frame Stackpointer, Stacksize, RARegister" +// Describe the stack frame. +// +// -- Mask directives "mask bitmask, offset" +// Tells the assembler which registers are saved and where. +// bitmask - contain a little endian bitset indicating which registers are +// saved on function prologue (e.g. with a 0x80000000 mask, the +// assembler knows the register 31 (RA) is saved at prologue. +// offset - the position before stack pointer subtraction indicating where +// the first saved register on prologue is located. (e.g. with a +// +// Consider the following function prologue: +// +// .frame R19,48,R15 +// .mask 0xc0000000,-8 +// addiu R1, R1, -48 +// sw R15, 40(R1) +// sw R19, 36(R1) +// +// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and +// 19 (R19) are saved at prologue. As the save order on prologue is from +// left to right, R15 is saved first. A -8 offset means that after the +// stack pointer subtration, the first register in the mask (R15) will be +// saved at address 48-8=40. +// +//===----------------------------------------------------------------------===// + +// Print a 32 bit hex number with all numbers. +static void printHex32(unsigned int Value, raw_ostream &O) { + O << "0x"; + for (int i = 7; i >= 0; i--) + O << utohexstr((Value & (0xF << (i*4))) >> (i*4)); +} + +// Create a bitmask with all callee saved registers for CPU or Floating Point +// registers. For CPU registers consider RA, GP and FP for saving if necessary. +void MBlazeAsmPrinter::printSavedRegsBitmask() { + const TargetFrameLowering *TFI = TM.getFrameLowering(); + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + + // CPU Saved Registers Bitmasks + unsigned int CPUBitmask = 0; + + // Set the CPU Bitmasks + const MachineFrameInfo *MFI = MF->getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg); + if (MBlaze::GPRRegisterClass->contains(Reg)) + CPUBitmask |= (1 << RegNum); + } + + // Return Address and Frame registers must also be set in CPUBitmask. + if (TFI->hasFP(*MF)) + CPUBitmask |= (1 << MBlazeRegisterInfo:: + getRegisterNumbering(RI.getFrameRegister(*MF))); + + if (MFI->adjustsStack()) + CPUBitmask |= (1 << MBlazeRegisterInfo:: + getRegisterNumbering(RI.getRARegister())); + + // Print CPUBitmask + OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask)); +} + +/// Frame Directive +void MBlazeAsmPrinter::emitFrameDirective() { + if (!OutStreamer.hasRawTextSupport()) + return; + + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + unsigned stkReg = RI.getFrameRegister(*MF); + unsigned retReg = RI.getRARegister(); + unsigned stkSze = MF->getFrameInfo()->getStackSize(); + + OutStreamer.EmitRawText("\t.frame\t" + + Twine(MBlazeInstPrinter::getRegisterName(stkReg)) + + "," + Twine(stkSze) + "," + + Twine(MBlazeInstPrinter::getRegisterName(retReg))); +} + +void MBlazeAsmPrinter::EmitFunctionEntryLabel() { + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); + AsmPrinter::EmitFunctionEntryLabel(); +} + +void MBlazeAsmPrinter::EmitFunctionBodyStart() { + if (!OutStreamer.hasRawTextSupport()) + return; + + emitFrameDirective(); + printSavedRegsBitmask(); +} + +void MBlazeAsmPrinter::EmitFunctionBodyEnd() { + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); +} + +//===----------------------------------------------------------------------===// +void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this); + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + OutStreamer.EmitInstruction(TmpInst); +} + +// Print out an operand for an inline asm expression. +bool MBlazeAsmPrinter:: +PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + printOperand(MI, OpNo, O); + return false; +} + +void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << MBlazeInstPrinter::getRegisterName(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + O << (int32_t)MO.getImm(); + break; + + case MachineOperand::MO_FPImmediate: { + const ConstantFP *fp = MO.getFPImm(); + printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O); + O << ";\t# immediate = " << *fp; + break; + } + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" + << getFunctionNumber() << "_" << MO.getIndex(); + if (MO.getOffset()) + O << "+" << MO.getOffset(); + break; + + default: + llvm_unreachable("<unknown operand type>"); + } +} + +void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << (uint32_t)MO.getImm(); + else + printOperand(MI, opNum, O); +} + +void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << "rfsl" << (unsigned int)MO.getImm(); + else + printOperand(MI, opNum, O); +} + +void MBlazeAsmPrinter:: +printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier) { + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum+1, O); +} + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool MBlazeAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Check if the last terminator is an unconditional branch. + MachineBasicBlock::const_iterator I = Pred->end(); + while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + ; // Noop + return I == Pred->end() || !I->getDesc().isBarrier(); +} + +static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI) { + if (SyntaxVariant == 0) + return new MBlazeInstPrinter(MAI); + return 0; +} + +// Force static initialization. +extern "C" void LLVMInitializeMBlazeAsmPrinter() { + RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget); + TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget, + createMBlazeMCInstPrinter); + +} diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td index 8622e0d..4962573 100644 --- a/lib/Target/MBlaze/MBlazeCallingConv.td +++ b/lib/Target/MBlaze/MBlazeCallingConv.td @@ -1,16 +1,16 @@ //===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // This describes the calling conventions for MBlaze architecture. //===----------------------------------------------------------------------===// /// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A>: +class CCIfSubtarget<string F, CCAction A>: CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>; //===----------------------------------------------------------------------===// @@ -19,8 +19,10 @@ class CCIfSubtarget<string F, CCAction A>: def RetCC_MBlaze : CallingConv<[ // i32 are returned in registers R3, R4 - CCIfType<[i32], CCAssignToReg<[R3, R4]>>, + CCIfType<[i32,f32], CCAssignToReg<[R3, R4]>> +]>; - // f32 are returned in registers F3, F4 - CCIfType<[f32], CCAssignToReg<[F3, F4]>> +def CC_MBlaze : CallingConv<[ + CCIfType<[i32,f32], CCCustom<"CC_MBlaze_AssignReg">>, + CCIfType<[i32,f32], CCAssignToStack<4, 4>> ]>; diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index b551b79..4399ee2 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// Simple pass to fills delay slots with NOPs. +// A pass that attempts to fill instructions with delay slots. If no +// instructions can be moved into the delay slot then a NOP is placed there. // //===----------------------------------------------------------------------===// @@ -19,11 +20,23 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(FilledSlots, "Number of delay slots filled"); +namespace llvm { +cl::opt<bool> DisableDelaySlotFiller( + "disable-mblaze-delay-filler", + cl::init(false), + cl::desc("Disable the MBlaze delay slot filter."), + cl::Hidden); +} + namespace { struct Filler : public MachineFunctionPass { @@ -31,7 +44,7 @@ namespace { const TargetInstrInfo *TII; static char ID; - Filler(TargetMachine &tm) + Filler(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } virtual const char *getPassName() const { @@ -51,6 +64,168 @@ namespace { char Filler::ID = 0; } // end of anonymous namespace +static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) { + // Any instruction with an immediate mode operand greater than + // 16-bits requires an implicit IMM instruction. + unsigned numOper = candidate->getNumOperands(); + for (unsigned op = 0; op < numOper; ++op) { + MachineOperand &mop = candidate->getOperand(op); + + // The operand requires more than 16-bits to represent. + if (mop.isImm() && (mop.getImm() < -0x8000 || mop.getImm() > 0x7fff)) + return true; + + // We must assume that unknown immediate values require more than + // 16-bits to represent. + if (mop.isGlobal() || mop.isSymbol()) + return true; + + // FIXME: we could probably check to see if the FP value happens + // to not need an IMM instruction. For now we just always + // assume that FP values do. + if (mop.isFPImm()) + return true; + } + + return false; +} + +static unsigned getLastRealOperand(MachineBasicBlock::iterator &instr) { + switch (instr->getOpcode()) { + default: return instr->getNumOperands(); + + // These instructions have a variable number of operands but the first two + // are the "real" operands that we care about during hazard detection. + case MBlaze::BRLID: + case MBlaze::BRALID: + case MBlaze::BRLD: + case MBlaze::BRALD: + return 2; + } +} + +static bool delayHasHazard(MachineBasicBlock::iterator &candidate, + MachineBasicBlock::iterator &slot) { + // Hazard check + MachineBasicBlock::iterator a = candidate; + MachineBasicBlock::iterator b = slot; + TargetInstrDesc desc = candidate->getDesc(); + + // MBB layout:- + // candidate := a0 = operation(a1, a2) + // ...middle bit... + // slot := b0 = operation(b1, b2) + + // Possible hazards:-/ + // 1. a1 or a2 was written during the middle bit + // 2. a0 was read or written during the middle bit + // 3. a0 is one or more of {b0, b1, b2} + // 4. b0 is one or more of {a1, a2} + // 5. a accesses memory, and the middle bit + // contains a store operation. + bool a_is_memory = desc.mayLoad() || desc.mayStore(); + + // Determine the number of operands in the slot instruction and in the + // candidate instruction. + const unsigned aend = getLastRealOperand(a); + const unsigned bend = getLastRealOperand(b); + + // Check hazards type 1, 2 and 5 by scanning the middle bit + MachineBasicBlock::iterator m = a; + for (++m; m != b; ++m) { + for (unsigned aop = 0; aop<aend; ++aop) { + bool aop_is_reg = a->getOperand(aop).isReg(); + if (!aop_is_reg) continue; + + bool aop_is_def = a->getOperand(aop).isDef(); + unsigned aop_reg = a->getOperand(aop).getReg(); + + const unsigned mend = getLastRealOperand(m); + for (unsigned mop = 0; mop<mend; ++mop) { + bool mop_is_reg = m->getOperand(mop).isReg(); + if (!mop_is_reg) continue; + + bool mop_is_def = m->getOperand(mop).isDef(); + unsigned mop_reg = m->getOperand(mop).getReg(); + + if (aop_is_def && (mop_reg == aop_reg)) + return true; // Hazard type 2, because aop = a0 + else if (mop_is_def && (mop_reg == aop_reg)) + return true; // Hazard type 1, because aop in {a1, a2} + } + } + + // Check hazard type 5 + if (a_is_memory && m->getDesc().mayStore()) + return true; + } + + // Check hazard type 3 & 4 + for (unsigned aop = 0; aop<aend; ++aop) { + if (a->getOperand(aop).isReg()) { + unsigned aop_reg = a->getOperand(aop).getReg(); + + for (unsigned bop = 0; bop<bend; ++bop) { + if (b->getOperand(bop).isReg() && !b->getOperand(bop).isImplicit()) { + unsigned bop_reg = b->getOperand(bop).getReg(); + if (aop_reg == bop_reg) + return true; + } + } + } + } + + return false; +} + +static bool isDelayFiller(MachineBasicBlock &MBB, + MachineBasicBlock::iterator candidate) { + if (candidate == MBB.begin()) + return false; + + TargetInstrDesc brdesc = (--candidate)->getDesc(); + return (brdesc.hasDelaySlot()); +} + +static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) { + if (!I->hasUnmodeledSideEffects()) + return false; + + unsigned op = I->getOpcode(); + if (op == MBlaze::ADDK || op == MBlaze::ADDIK || + op == MBlaze::ADDC || op == MBlaze::ADDIC || + op == MBlaze::ADDKC || op == MBlaze::ADDIKC || + op == MBlaze::RSUBK || op == MBlaze::RSUBIK || + op == MBlaze::RSUBC || op == MBlaze::RSUBIC || + op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC) + return false; + + return true; +} + +static MachineBasicBlock::iterator +findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) { + MachineBasicBlock::iterator I = slot; + while (true) { + if (I == MBB.begin()) + break; + + --I; + TargetInstrDesc desc = I->getDesc(); + if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) || + desc.isCall() || desc.isReturn() || desc.isBarrier() || + hasUnknownSideEffects(I)) + break; + + if (hasImmInstruction(I) || delayHasHazard(I,slot)) + continue; + + return I; + } + + return MBB.end(); +} + /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// Currently, we fill delay slots with NOPs. We assume there is only one /// delay slot per delayed instruction. @@ -58,11 +233,19 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) if (I->getDesc().hasDelaySlot()) { + MachineBasicBlock::iterator D = MBB.end(); MachineBasicBlock::iterator J = I; - ++J; - BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP)); + + if (!DisableDelaySlotFiller) + D = findDelayInstr(MBB,I); + ++FilledSlots; Changed = true; + + if (D == MBB.end()) + BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(MBlaze::NOP)); + else + MBB.splice(++J, &MBB, D); } return Changed; } diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp new file mode 100644 index 0000000..3f26ed1 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp @@ -0,0 +1,111 @@ +//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the MBlaze backend. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeELFWriterInfo.h" +#include "MBlazeRelocations.h" +#include "llvm/Function.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Implementation of the MBlazeELFWriterInfo class +//===----------------------------------------------------------------------===// + +MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM) + : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64, + TM.getTargetData()->isLittleEndian()) { +} + +MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {} + +unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { + switch (MachineRelTy) { + case MBlaze::reloc_pcrel_word: + return ELF::R_MICROBLAZE_64_PCREL; + case MBlaze::reloc_absolute_word: + return ELF::R_MICROBLAZE_NONE; + default: + llvm_unreachable("unknown mblaze machine relocation type"); + } + return 0; +} + +long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier) const { + switch (RelTy) { + case ELF::R_MICROBLAZE_32_PCREL: + return Modifier - 4; + case ELF::R_MICROBLAZE_32: + return Modifier; + default: + llvm_unreachable("unknown mblaze relocation type"); + } + return 0; +} + +unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const { + // FIXME: Most of these sizes are guesses based on the name + switch (RelTy) { + case ELF::R_MICROBLAZE_32: + case ELF::R_MICROBLAZE_32_PCREL: + case ELF::R_MICROBLAZE_32_PCREL_LO: + case ELF::R_MICROBLAZE_32_LO: + case ELF::R_MICROBLAZE_SRO32: + case ELF::R_MICROBLAZE_SRW32: + case ELF::R_MICROBLAZE_32_SYM_OP_SYM: + case ELF::R_MICROBLAZE_GOTOFF_32: + return 32; + + case ELF::R_MICROBLAZE_64_PCREL: + case ELF::R_MICROBLAZE_64: + case ELF::R_MICROBLAZE_GOTPC_64: + case ELF::R_MICROBLAZE_GOT_64: + case ELF::R_MICROBLAZE_PLT_64: + case ELF::R_MICROBLAZE_GOTOFF_64: + return 64; + } + + return 0; +} + +bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { + // FIXME: Most of these are guesses based on the name + switch (RelTy) { + case ELF::R_MICROBLAZE_32_PCREL: + case ELF::R_MICROBLAZE_64_PCREL: + case ELF::R_MICROBLAZE_32_PCREL_LO: + case ELF::R_MICROBLAZE_GOTPC_64: + return true; + } + + return false; +} + +unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const { + return MBlaze::reloc_absolute_word; +} + +long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset, + unsigned RelOffset, + unsigned RelTy) const { + if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL) + return SymOffset - (RelOffset + 4); + else + assert("computeRelocation unknown for this relocation type"); + + return 0; +} diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/lib/Target/MBlaze/MBlazeELFWriterInfo.h new file mode 100644 index 0000000..63bfc0d --- /dev/null +++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.h @@ -0,0 +1,58 @@ +//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the MBlaze backend. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZE_ELF_WRITER_INFO_H +#define MBLAZE_ELF_WRITER_INFO_H + +#include "llvm/Target/TargetELFWriterInfo.h" + +namespace llvm { + + class MBlazeELFWriterInfo : public TargetELFWriterInfo { + public: + MBlazeELFWriterInfo(TargetMachine &TM); + virtual ~MBlazeELFWriterInfo(); + + /// getRelocationType - Returns the target specific ELF Relocation type. + /// 'MachineRelTy' contains the object code independent relocation type + virtual unsigned getRelocationType(unsigned MachineRelTy) const; + + /// hasRelocationAddend - True if the target uses an addend in the + /// ELF relocation entry. + virtual bool hasRelocationAddend() const { return false; } + + /// getDefaultAddendForRelTy - Gets the default addend value for a + /// relocation entry based on the target ELF relocation type. + virtual long int getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier = 0) const; + + /// getRelTySize - Returns the size of relocatable field in bits + virtual unsigned getRelocationTySize(unsigned RelTy) const; + + /// isPCRelativeRel - True if the relocation type is pc relative + virtual bool isPCRelativeRel(unsigned RelTy) const; + + /// getJumpTableRelocationTy - Returns the machine relocation type used + /// to reference a jumptable. + virtual unsigned getAbsoluteLabelMachineRelTy() const; + + /// computeRelocation - Some relocatable fields could be relocated + /// directly, avoiding the relocation symbol emission, compute the + /// final relocation value for this symbol. + virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, + unsigned RelTy) const; + }; + +} // end llvm namespace + +#endif // MBLAZE_ELF_WRITER_INFO_H diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp new file mode 100644 index 0000000..e763902 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -0,0 +1,450 @@ +//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MBlaze implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-frame-lowering" + +#include "MBlazeFrameLowering.h" +#include "MBlazeInstrInfo.h" +#include "MBlazeMachineFunction.h" +#include "InstPrinter/MBlazeInstPrinter.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace llvm { + cl::opt<bool> DisableStackAdjust( + "disable-mblaze-stack-adjust", + cl::init(false), + cl::desc("Disable MBlaze stack layout adjustment."), + cl::Hidden); +} + +static void replaceFrameIndexes(MachineFunction &MF, + SmallVector<std::pair<int,int64_t>, 16> &FR) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + const SmallVector<std::pair<int,int64_t>, 16>::iterator FRB = FR.begin(); + const SmallVector<std::pair<int,int64_t>, 16>::iterator FRE = FR.end(); + + SmallVector<std::pair<int,int64_t>, 16>::iterator FRI = FRB; + for (; FRI != FRE; ++FRI) { + MFI->RemoveStackObject(FRI->first); + int NFI = MFI->CreateFixedObject(4, FRI->second, true); + MBlazeFI->recordReplacement(FRI->first, NFI); + + for (MachineFunction::iterator MB=MF.begin(), ME=MF.end(); MB!=ME; ++MB) { + MachineBasicBlock::iterator MBB = MB->begin(); + const MachineBasicBlock::iterator MBE = MB->end(); + + for (; MBB != MBE; ++MBB) { + MachineInstr::mop_iterator MIB = MBB->operands_begin(); + const MachineInstr::mop_iterator MIE = MBB->operands_end(); + + for (MachineInstr::mop_iterator MII = MIB; MII != MIE; ++MII) { + if (!MII->isFI() || MII->getIndex() != FRI->first) continue; + DEBUG(dbgs() << "FOUND FI#" << MII->getIndex() << "\n"); + MII->setIndex(NFI); + } + } + } + } +} + +//===----------------------------------------------------------------------===// +// +// Stack Frame Processing methods +// +----------------------------+ +// +// The stack is allocated decrementing the stack pointer on +// the first instruction of a function prologue. Once decremented, +// all stack references are are done through a positive offset +// from the stack/frame pointer, so the stack is considered +// to grow up. +// +//===----------------------------------------------------------------------===// + +static void analyzeFrameIndexes(MachineFunction &MF) { + if (DisableStackAdjust) return; + + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + MachineRegisterInfo::livein_iterator LII = MRI.livein_begin(); + MachineRegisterInfo::livein_iterator LIE = MRI.livein_end(); + const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn(); + SmallVector<MachineInstr*, 16> EraseInstr; + SmallVector<std::pair<int,int64_t>, 16> FrameRelocate; + + MachineBasicBlock *MBB = MF.getBlockNumbered(0); + MachineBasicBlock::iterator MIB = MBB->begin(); + MachineBasicBlock::iterator MIE = MBB->end(); + + int StackAdjust = 0; + int StackOffset = -28; + + // In this loop we are searching frame indexes that corrospond to incoming + // arguments that are already in the stack. We look for instruction sequences + // like the following: + // + // LWI REG, FI1, 0 + // ... + // SWI REG, FI2, 0 + // + // As long as there are no defs of REG in the ... part, we can eliminate + // the SWI instruction because the value has already been stored to the + // stack by the caller. All we need to do is locate FI at the correct + // stack location according to the calling convensions. + // + // Additionally, if the SWI operation kills the def of REG then we don't + // need the LWI operation so we can erase it as well. + for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) { + for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) { + if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 || + !I->getOperand(1).isFI() || !I->getOperand(0).isReg() || + I->getOperand(1).getIndex() != LiveInFI[i]) continue; + + unsigned FIReg = I->getOperand(0).getReg(); + MachineBasicBlock::iterator SI = I; + for (SI++; SI != MIE; ++SI) { + if (!SI->getOperand(0).isReg() || + !SI->getOperand(1).isFI() || + SI->getOpcode() != MBlaze::SWI) continue; + + int FI = SI->getOperand(1).getIndex(); + if (SI->getOperand(0).getReg() != FIReg || + MFI->isFixedObjectIndex(FI) || + MFI->getObjectSize(FI) != 4) continue; + + if (SI->getOperand(0).isDef()) break; + + if (SI->getOperand(0).isKill()) { + DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex() + << " removed\n"); + EraseInstr.push_back(I); + } + + EraseInstr.push_back(SI); + DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n"); + + FrameRelocate.push_back(std::make_pair(FI,StackOffset)); + DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n"); + + StackOffset -= 4; + StackAdjust += 4; + break; + } + } + } + + // In this loop we are searching for frame indexes that corrospond to + // incoming arguments that are in registers. We look for instruction + // sequences like the following: + // + // ... SWI REG, FI, 0 + // + // As long as the ... part does not define REG and if REG is an incoming + // parameter register then we know that, according to ABI convensions, the + // caller has allocated stack space for it already. Instead of allocating + // stack space on our frame, we record the correct location in the callers + // frame. + for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) { + for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) { + if (I->definesRegister(LI->first)) + break; + + if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 || + !I->getOperand(1).isFI() || !I->getOperand(0).isReg() || + I->getOperand(1).getIndex() < 0) continue; + + if (I->getOperand(0).getReg() == LI->first) { + int FI = I->getOperand(1).getIndex(); + MBlazeFI->recordLiveIn(FI); + + int FILoc = 0; + switch (LI->first) { + default: llvm_unreachable("invalid incoming parameter!"); + case MBlaze::R5: FILoc = -4; break; + case MBlaze::R6: FILoc = -8; break; + case MBlaze::R7: FILoc = -12; break; + case MBlaze::R8: FILoc = -16; break; + case MBlaze::R9: FILoc = -20; break; + case MBlaze::R10: FILoc = -24; break; + } + + StackAdjust += 4; + FrameRelocate.push_back(std::make_pair(FI,FILoc)); + DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n"); + break; + } + } + } + + // Go ahead and erase all of the instructions that we determined were + // no longer needed. + for (int i = 0, e = EraseInstr.size(); i < e; ++i) + MBB->erase(EraseInstr[i]); + + // Replace all of the frame indexes that we have relocated with new + // fixed object frame indexes. + replaceFrameIndexes(MF, FrameRelocate); +} + +static void interruptFrameLayout(MachineFunction &MF) { + const Function *F = MF.getFunction(); + llvm::CallingConv::ID CallConv = F->getCallingConv(); + + // If this function is not using either the interrupt_handler + // calling convention or the save_volatiles calling convention + // then we don't need to do any additional frame layout. + if (CallConv != llvm::CallingConv::MBLAZE_INTR && + CallConv != llvm::CallingConv::MBLAZE_SVOL) + return; + + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const MBlazeInstrInfo &TII = + *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo()); + + // Determine if the calling convention is the interrupt_handler + // calling convention. Some pieces of the prologue and epilogue + // only need to be emitted if we are lowering and interrupt handler. + bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR; + + // Determine where to put prologue and epilogue additions + MachineBasicBlock &MENT = MF.front(); + MachineBasicBlock &MEXT = MF.back(); + + MachineBasicBlock::iterator MENTI = MENT.begin(); + MachineBasicBlock::iterator MEXTI = prior(MEXT.end()); + + DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc(); + DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc(); + + // Store the frame indexes generated during prologue additions for use + // when we are generating the epilogue additions. + SmallVector<int, 10> VFI; + + // Build the prologue SWI for R3 - R12 if needed. Note that R11 must + // always have a SWI because it is used when processing RMSR. + for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) { + if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue; + + int FI = MFI->CreateStackObject(4,4,false,false); + VFI.push_back(FI); + + BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r) + .addFrameIndex(FI).addImm(0); + } + + // Build the prologue SWI for R17, R18 + int R17FI = MFI->CreateStackObject(4,4,false,false); + int R18FI = MFI->CreateStackObject(4,4,false,false); + + BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17) + .addFrameIndex(R17FI).addImm(0); + + BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18) + .addFrameIndex(R18FI).addImm(0); + + // Buid the prologue SWI and the epilogue LWI for RMSR if needed + if (isIntr) { + int MSRFI = MFI->CreateStackObject(4,4,false,false); + BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11) + .addReg(MBlaze::RMSR); + BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11) + .addFrameIndex(MSRFI).addImm(0); + + BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11) + .addFrameIndex(MSRFI).addImm(0); + BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR) + .addReg(MBlaze::R11); + } + + // Build the epilogue LWI for R17, R18 + BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18) + .addFrameIndex(R18FI).addImm(0); + + BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17) + .addFrameIndex(R17FI).addImm(0); + + // Build the epilogue LWI for R3 - R12 if needed + for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) { + if (!MRI.isPhysRegUsed(r)) continue; + BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r) + .addFrameIndex(VFI[--i]).addImm(0); + } +} + +static void determineFrameLayout(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + + // Replace the dummy '0' SPOffset by the negative offsets, as explained on + // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid + // the approach done by calculateFrameObjectOffsets to the stack frame. + MBlazeFI->adjustLoadArgsFI(MFI); + MBlazeFI->adjustStoreVarArgsFI(MFI); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + DEBUG(dbgs() << "Original Frame Size: " << FrameSize << "\n" ); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + // unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned AlignMask = TargetAlign - 1; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + MFI->setStackSize(FrameSize); + DEBUG(dbgs() << "Aligned Frame Size: " << FrameSize << "\n" ); +} + +int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) + const { + const MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + if (MBlazeFI->hasReplacement(FI)) + FI = MBlazeFI->getReplacement(FI); + return TargetFrameLowering::getFrameIndexOffset(MF,FI); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); +} + +void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MBlazeInstrInfo &TII = + *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo()); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + + // Determine the correct frame layout + determineFrameLayout(MF); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return; + + int FPOffset = MBlazeFI->getFPStackOffset(); + int RAOffset = MBlazeFI->getRAStackOffset(); + + // Adjust stack : addi R1, R1, -imm + BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDIK), MBlaze::R1) + .addReg(MBlaze::R1).addImm(-StackSize); + + // swi R15, R1, stack_loc + if (MFI->adjustsStack() || requiresRA) { + BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI)) + .addReg(MBlaze::R15).addReg(MBlaze::R1).addImm(RAOffset); + } + + if (hasFP(MF)) { + // swi R19, R1, stack_loc + BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI)) + .addReg(MBlaze::R19).addReg(MBlaze::R1).addImm(FPOffset); + + // add R19, R1, R0 + BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19) + .addReg(MBlaze::R1).addReg(MBlaze::R0); + } +} + +void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + const MBlazeInstrInfo &TII = + *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo()); + + DebugLoc dl = MBBI->getDebugLoc(); + + llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + + // Get the FI's where RA and FP are saved. + int FPOffset = MBlazeFI->getFPStackOffset(); + int RAOffset = MBlazeFI->getRAStackOffset(); + + if (hasFP(MF)) { + // add R1, R19, R0 + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1) + .addReg(MBlaze::R19).addReg(MBlaze::R0); + + // lwi R19, R1, stack_loc + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19) + .addReg(MBlaze::R1).addImm(FPOffset); + } + + // lwi R15, R1, stack_loc + if (MFI->adjustsStack() || requiresRA) { + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15) + .addReg(MBlaze::R1).addImm(RAOffset); + } + + // Get the number of bytes from FrameInfo + int StackSize = (int) MFI->getStackSize(); + + // addi R1, R1, imm + if (StackSize) { + BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDIK), MBlaze::R1) + .addReg(MBlaze::R1).addImm(StackSize); + } +} + +void MBlazeFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + + if (MFI->adjustsStack() || requiresRA) { + MBlazeFI->setRAStackOffset(0); + MFI->CreateFixedObject(4,0,true); + } + + if (hasFP(MF)) { + MBlazeFI->setFPStackOffset(4); + MFI->CreateFixedObject(4,4,true); + } + + interruptFrameLayout(MF); + analyzeFrameIndexes(MF); +} diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h new file mode 100644 index 0000000..8be15bf --- /dev/null +++ b/lib/Target/MBlaze/MBlazeFrameLowering.h @@ -0,0 +1,53 @@ +//=- MBlazeFrameLowering.h - Define frame lowering for MicroBlaze -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZE_FRAMEINFO_H +#define MBLAZE_FRAMEINFO_H + +#include "MBlaze.h" +#include "MBlazeSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class MBlazeSubtarget; + +class MBlazeFrameLowering : public TargetFrameLowering { +protected: + const MBlazeSubtarget &STI; + +public: + explicit MBlazeFrameLowering(const MBlazeSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 4, 0), STI(sti) { + } + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + bool targetHandlesStackFrameRounding() const { return true; } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const; + + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + + virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp index e64dd0e..6b43497 100644 --- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -81,13 +81,9 @@ private: SDNode *getGlobalBaseReg(); SDNode *Select(SDNode *N); - // Complex Pattern. - bool SelectAddr(SDNode *Op, SDValue N, - SDValue &Base, SDValue &Offset); - // Address Selection - bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index); - bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base); + bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index); + bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base); // getI32Imm - Return a target constant with the specified value, of type i32. inline SDValue getI32Imm(unsigned Imm) { @@ -122,7 +118,7 @@ static bool isIntS32Immediate(SDValue Op, int32_t &Imm) { /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool MBlazeDAGToDAGISel:: -SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { +SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) { if (N.getOpcode() == ISD::FrameIndex) return false; if (N.getOpcode() == ISD::TargetExternalSymbol || N.getOpcode() == ISD::TargetGlobalAddress) @@ -137,8 +133,8 @@ SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { N.getOperand(1).getOpcode() == ISD::TargetJumpTable) return false; // jump tables. - Base = N.getOperand(1); - Index = N.getOperand(0); + Base = N.getOperand(0); + Index = N.getOperand(1); return true; } @@ -149,9 +145,9 @@ SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) { /// a signed 32-bit displacement [r+imm], and if it is not better /// represented as reg+reg. bool MBlazeDAGToDAGISel:: -SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { +SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) { // If this can be more profitably realized as r+r, fail. - if (SelectAddrRegReg(Op, N, Disp, Base)) + if (SelectAddrRegReg(N, Base, Disp)) return false; if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { @@ -163,7 +159,6 @@ SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { } else { Base = N.getOperand(0); } - DEBUG( errs() << "WESLEY: Using Operand Immediate\n" ); return true; // [r+i] } } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { @@ -171,7 +166,6 @@ SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) { uint32_t Imm = CN->getZExtValue(); Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0)); Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0)); - DEBUG( errs() << "WESLEY: Using Constant Node\n" ); return true; } @@ -190,76 +184,21 @@ SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -/// ComplexPattern used on MBlazeInstrInfo -/// Used on MBlaze Load/Store instructions -bool MBlazeDAGToDAGISel:: -SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) { - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; - } - - // on PIC code Load GA - if (TM.getRelocationModel() == Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || - (Addr.getOpcode() == ISD::TargetConstantPool) || - (Addr.getOpcode() == ISD::TargetJumpTable)){ - Base = CurDAG->getRegister(MBlaze::R15, MVT::i32); - Offset = Addr; - return true; - } - } else { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - if (isUInt<16>(CN->getZExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - } else { - Base = Addr.getOperand(0); - } - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); - return true; - } - } - } - - Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; -} - /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); - // Dump information about the Node being selected - DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); - // If we have a custom node, we already have selected! - if (Node->isMachineOpcode()) { - DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + if (Node->isMachineOpcode()) return NULL; - } /// // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. /// - switch(Opcode) { + switch (Opcode) { default: break; // Get target GOT address. @@ -271,7 +210,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) { int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex(); EVT VT = Node->getValueType(0); SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); - unsigned Opc = MBlaze::ADDI; + unsigned Opc = MBlaze::ADDIK; if (Node->hasOneUse()) return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm); return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm); @@ -289,8 +228,8 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) { SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32); SDValue InFlag(0, 0); - if ( (isa<GlobalAddressSDNode>(Callee)) || - (isa<ExternalSymbolSDNode>(Callee)) ) + if ((isa<GlobalAddressSDNode>(Callee)) || + (isa<ExternalSymbolSDNode>(Callee))) { /// Direct call for global addresses and external symbols SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32); @@ -309,7 +248,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) { // Emit Jump and Link Register SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other, - MVT::Flag, R20Reg, Chain); + MVT::Glue, R20Reg, Chain); Chain = SDValue(ResNode, 0); InFlag = SDValue(ResNode, 1); ReplaceUses(SDValue(Node, 0), Chain); diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 1730b68..2f40bfc 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -35,6 +35,11 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink"; @@ -56,9 +61,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) setBooleanContents(ZeroOrOneBooleanContent); // Set up the register classes - addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass); + addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass); if (Subtarget->hasFPU()) { - addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass); + addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass); setOperationAction(ISD::ConstantFP, MVT::f32, Legal); } @@ -86,6 +91,10 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + // Sign extended loads must be expanded + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); + // MBlaze has no REM or DIVREM operations. setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i32, Expand); @@ -112,8 +121,8 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) } // Expand unsupported conversions - setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); // Expand SELECT_CC setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); @@ -166,7 +175,6 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); // MBlaze doesn't have extending float->double load/store setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); @@ -204,172 +212,353 @@ SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, //===----------------------------------------------------------------------===// MachineBasicBlock* MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - + MachineBasicBlock *MBB) + const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); + case MBlaze::ShiftRL: case MBlaze::ShiftRA: - case MBlaze::ShiftL: { - // To "insert" a shift left instruction, we actually have to insert a - // simple loop. The incoming instruction knows the destination vreg to - // set, the source vreg to operate over and the shift amount. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // start: - // andi samt, samt, 31 - // beqid samt, finish - // add dst, src, r0 - // loop: - // addik samt, samt, -1 - // sra dst, dst - // bneid samt, loop - // nop - // finish: - MachineFunction *F = BB->getParent(); - MachineRegisterInfo &R = F->getRegInfo(); - MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, loop); - F->insert(It, finish); - - // Update machine-CFG edges by transfering adding all successors and - // remaining instructions from the current block to the new block which - // will contain the Phi node for the select. - finish->splice(finish->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - finish->transferSuccessorsAndUpdatePHIs(BB); - - // Add the true and fallthrough blocks as its successors. - BB->addSuccessor(loop); - BB->addSuccessor(finish); - - // Next, add the finish block as a successor of the loop block - loop->addSuccessor(finish); - loop->addSuccessor(loop); - - unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); - BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT) - .addReg(MI->getOperand(2).getReg()) - .addImm(31); - - unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); - BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL) - .addReg(MI->getOperand(1).getReg()) - .addImm(0); - - BuildMI(BB, dl, TII->get(MBlaze::BEQID)) - .addReg(IAMT) - .addMBB(finish); - - unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); - unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); - BuildMI(loop, dl, TII->get(MBlaze::PHI), DST) - .addReg(IVAL).addMBB(BB) - .addReg(NDST).addMBB(loop); - - unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); - unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); - BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT) - .addReg(IAMT).addMBB(BB) - .addReg(NAMT).addMBB(loop); - - if (MI->getOpcode() == MBlaze::ShiftL) - BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST); - else if (MI->getOpcode() == MBlaze::ShiftRA) - BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST); - else if (MI->getOpcode() == MBlaze::ShiftRL) - BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST); - else - llvm_unreachable( "Cannot lower unknown shift instruction" ); - - BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT) - .addReg(SAMT) - .addImm(-1); - - BuildMI(loop, dl, TII->get(MBlaze::BNEID)) - .addReg(NAMT) - .addMBB(loop); - - BuildMI(*finish, finish->begin(), dl, - TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) - .addReg(IVAL).addMBB(BB) - .addReg(NDST).addMBB(loop); - - // The pseudo instruction is no longer needed so remove it + case MBlaze::ShiftL: + return EmitCustomShift(MI, MBB); + + case MBlaze::Select_FCC: + case MBlaze::Select_CC: + return EmitCustomSelect(MI, MBB); + + case MBlaze::CAS32: + case MBlaze::SWP32: + case MBlaze::LAA32: + case MBlaze::LAS32: + case MBlaze::LAD32: + case MBlaze::LAO32: + case MBlaze::LAX32: + case MBlaze::LAN32: + return EmitCustomAtomic(MI, MBB); + + case MBlaze::MEMBARRIER: + // The Microblaze does not need memory barriers. Just delete the pseudo + // instruction and finish. MI->eraseFromParent(); - return finish; + return MBB; + } +} + +MachineBasicBlock* +MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + // To "insert" a shift left instruction, we actually have to insert a + // simple loop. The incoming instruction knows the destination vreg to + // set, the source vreg to operate over and the shift amount. + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator It = MBB; + ++It; + + // start: + // andi samt, samt, 31 + // beqid samt, finish + // add dst, src, r0 + // loop: + // addik samt, samt, -1 + // sra dst, dst + // bneid samt, loop + // nop + // finish: + MachineFunction *F = MBB->getParent(); + MachineRegisterInfo &R = F->getRegInfo(); + MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loop); + F->insert(It, finish); + + // Update machine-CFG edges by transfering adding all successors and + // remaining instructions from the current block to the new block which + // will contain the Phi node for the select. + finish->splice(finish->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + finish->transferSuccessorsAndUpdatePHIs(MBB); + + // Add the true and fallthrough blocks as its successors. + MBB->addSuccessor(loop); + MBB->addSuccessor(finish); + + // Next, add the finish block as a successor of the loop block + loop->addSuccessor(finish); + loop->addSuccessor(loop); + + unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass); + BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT) + .addReg(MI->getOperand(2).getReg()) + .addImm(31); + + unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass); + BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL) + .addReg(MI->getOperand(1).getReg()) + .addImm(0); + + BuildMI(MBB, dl, TII->get(MBlaze::BEQID)) + .addReg(IAMT) + .addMBB(finish); + + unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass); + unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass); + BuildMI(loop, dl, TII->get(MBlaze::PHI), DST) + .addReg(IVAL).addMBB(MBB) + .addReg(NDST).addMBB(loop); + + unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass); + unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass); + BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT) + .addReg(IAMT).addMBB(MBB) + .addReg(NAMT).addMBB(loop); + + if (MI->getOpcode() == MBlaze::ShiftL) + BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST); + else if (MI->getOpcode() == MBlaze::ShiftRA) + BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST); + else if (MI->getOpcode() == MBlaze::ShiftRL) + BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST); + else + llvm_unreachable("Cannot lower unknown shift instruction"); + + BuildMI(loop, dl, TII->get(MBlaze::ADDIK), NAMT) + .addReg(SAMT) + .addImm(-1); + + BuildMI(loop, dl, TII->get(MBlaze::BNEID)) + .addReg(NAMT) + .addMBB(loop); + + BuildMI(*finish, finish->begin(), dl, + TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + .addReg(IVAL).addMBB(MBB) + .addReg(NDST).addMBB(loop); + + // The pseudo instruction is no longer needed so remove it + MI->eraseFromParent(); + return finish; +} + +MachineBasicBlock* +MBlazeTargetLowering::EmitCustomSelect(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator It = MBB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB); + + unsigned Opc; + switch (MI->getOperand(4).getImm()) { + default: llvm_unreachable("Unknown branch condition"); + case MBlazeCC::EQ: Opc = MBlaze::BEQID; break; + case MBlazeCC::NE: Opc = MBlaze::BNEID; break; + case MBlazeCC::GT: Opc = MBlaze::BGTID; break; + case MBlazeCC::LT: Opc = MBlaze::BLTID; break; + case MBlazeCC::GE: Opc = MBlaze::BGEID; break; + case MBlazeCC::LE: Opc = MBlaze::BLEID; break; + } + + F->insert(It, flsBB); + F->insert(It, dneBB); + + // Transfer the remainder of MBB and its successor edges to dneBB. + dneBB->splice(dneBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + dneBB->transferSuccessorsAndUpdatePHIs(MBB); + + MBB->addSuccessor(flsBB); + MBB->addSuccessor(dneBB); + flsBB->addSuccessor(dneBB); + + BuildMI(MBB, dl, TII->get(Opc)) + .addReg(MI->getOperand(3).getReg()) + .addMBB(dneBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB) + // .addReg(MI->getOperand(2).getReg()).addMBB(BB); + + BuildMI(*dneBB, dneBB->begin(), dl, + TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()).addMBB(flsBB) + .addReg(MI->getOperand(1).getReg()).addMBB(MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return dneBB; +} + +MachineBasicBlock* +MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + + // All atomic instructions on the Microblaze are implemented using the + // load-linked / store-conditional style atomic instruction sequences. + // Thus, all operations will look something like the following: + // + // start: + // lwx RV, RP, 0 + // <do stuff> + // swx RV, RP, 0 + // addic RC, R0, 0 + // bneid RC, start + // + // exit: + // + // To "insert" a shift left instruction, we actually have to insert a + // simple loop. The incoming instruction knows the destination vreg to + // set, the source vreg to operate over and the shift amount. + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator It = MBB; + ++It; + + // start: + // andi samt, samt, 31 + // beqid samt, finish + // add dst, src, r0 + // loop: + // addik samt, samt, -1 + // sra dst, dst + // bneid samt, loop + // nop + // finish: + MachineFunction *F = MBB->getParent(); + MachineRegisterInfo &R = F->getRegInfo(); + + // Create the start and exit basic blocks for the atomic operation + MachineBasicBlock *start = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exit = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, start); + F->insert(It, exit); + + // Update machine-CFG edges by transfering adding all successors and + // remaining instructions from the current block to the new block which + // will contain the Phi node for the select. + exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + exit->transferSuccessorsAndUpdatePHIs(MBB); + + // Add the fallthrough block as its successors. + MBB->addSuccessor(start); + + BuildMI(start, dl, TII->get(MBlaze::LWX), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(MBlaze::R0); + + MachineBasicBlock *final = start; + unsigned finalReg = 0; + + switch (MI->getOpcode()) { + default: llvm_unreachable("Cannot lower unknown atomic instruction!"); + + case MBlaze::SWP32: + finalReg = MI->getOperand(2).getReg(); + start->addSuccessor(exit); + start->addSuccessor(start); + break; + + case MBlaze::LAN32: + case MBlaze::LAX32: + case MBlaze::LAO32: + case MBlaze::LAD32: + case MBlaze::LAS32: + case MBlaze::LAA32: { + unsigned opcode = 0; + switch (MI->getOpcode()) { + default: llvm_unreachable("Cannot lower unknown atomic load!"); + case MBlaze::LAA32: opcode = MBlaze::ADDIK; break; + case MBlaze::LAS32: opcode = MBlaze::RSUBIK; break; + case MBlaze::LAD32: opcode = MBlaze::AND; break; + case MBlaze::LAO32: opcode = MBlaze::OR; break; + case MBlaze::LAX32: opcode = MBlaze::XOR; break; + case MBlaze::LAN32: opcode = MBlaze::AND; break; } - case MBlaze::Select_FCC: - case MBlaze::Select_CC: { - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineFunction *F = BB->getParent(); - MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB); - - unsigned Opc; - switch (MI->getOperand(4).getImm()) { - default: llvm_unreachable( "Unknown branch condition" ); - case MBlazeCC::EQ: Opc = MBlaze::BNEID; break; - case MBlazeCC::NE: Opc = MBlaze::BEQID; break; - case MBlazeCC::GT: Opc = MBlaze::BLEID; break; - case MBlazeCC::LT: Opc = MBlaze::BGEID; break; - case MBlazeCC::GE: Opc = MBlaze::BLTID; break; - case MBlazeCC::LE: Opc = MBlaze::BGTID; break; + finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass); + start->addSuccessor(exit); + start->addSuccessor(start); + + BuildMI(start, dl, TII->get(opcode), finalReg) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()); + + if (MI->getOpcode() == MBlaze::LAN32) { + unsigned tmp = finalReg; + finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass); + BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg) + .addReg(tmp) + .addImm(-1); } + break; + } + + case MBlaze::CAS32: { + finalReg = MI->getOperand(3).getReg(); + final = F->CreateMachineBasicBlock(LLVM_BB); + + F->insert(It, final); + start->addSuccessor(exit); + start->addSuccessor(final); + final->addSuccessor(exit); + final->addSuccessor(start); + + unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass); + BuildMI(start, dl, TII->get(MBlaze::CMP), CMP) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()); - F->insert(It, flsBB); - F->insert(It, dneBB); - - // Transfer the remainder of BB and its successor edges to dneBB. - dneBB->splice(dneBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - dneBB->transferSuccessorsAndUpdatePHIs(BB); - - BB->addSuccessor(flsBB); - BB->addSuccessor(dneBB); - flsBB->addSuccessor(dneBB); - - BuildMI(BB, dl, TII->get(Opc)) - .addReg(MI->getOperand(3).getReg()) - .addMBB(dneBB); - - // sinkMBB: - // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] - // ... - //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) - // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB) - // .addReg(MI->getOperand(2).getReg()).addMBB(BB); - - BuildMI(*dneBB, dneBB->begin(), dl, - TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(2).getReg()).addMBB(flsBB) - .addReg(MI->getOperand(1).getReg()).addMBB(BB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return dneBB; + BuildMI(start, dl, TII->get(MBlaze::BNEID)) + .addReg(CMP) + .addMBB(exit); + + final->moveAfter(start); + exit->moveAfter(final); + break; } } + + unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass); + BuildMI(final, dl, TII->get(MBlaze::SWX)) + .addReg(finalReg) + .addReg(MI->getOperand(1).getReg()) + .addReg(MBlaze::R0); + + BuildMI(final, dl, TII->get(MBlaze::ADDIC), CHK) + .addReg(MBlaze::R0) + .addImm(0); + + BuildMI(final, dl, TII->get(MBlaze::BNEID)) + .addReg(CHK) + .addMBB(start); + + // The pseudo instruction is no longer needed so remove it + MI->eraseFromParent(); + return exit; } //===----------------------------------------------------------------------===// @@ -392,9 +581,9 @@ SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS) .getValue(1); } else { - llvm_unreachable( "Cannot lower select_cc with unknown type" ); + llvm_unreachable("Cannot lower select_cc with unknown type"); } - + return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal, CompareFlag); } @@ -421,15 +610,12 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue HiPart; // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO; EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0); return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI); - //return JTI; } SDValue MBlazeTargetLowering:: @@ -440,7 +626,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MBlazeII::MO_ABS_HILO); + N->getOffset(), 0); return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP); } @@ -456,7 +642,8 @@ SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0, + return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); } @@ -466,52 +653,24 @@ SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, #include "MBlazeGenCallingConv.inc" -static bool CC_MBlaze2(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const unsigned RegsSize=6; - static const unsigned IntRegs[] = { +static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned ArgRegs[] = { MBlaze::R5, MBlaze::R6, MBlaze::R7, MBlaze::R8, MBlaze::R9, MBlaze::R10 }; - static const unsigned FltRegs[] = { - MBlaze::F5, MBlaze::F6, MBlaze::F7, - MBlaze::F8, MBlaze::F9, MBlaze::F10 - }; + const unsigned NumArgRegs = array_lengthof(ArgRegs); + unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs); + if (!Reg) return false; - unsigned Reg=0; - - // Promote i8 and i16 - if (LocVT == MVT::i8 || LocVT == MVT::i16) { - LocVT = MVT::i32; - if (ArgFlags.isSExt()) - LocInfo = CCValAssign::SExt; - else if (ArgFlags.isZExt()) - LocInfo = CCValAssign::ZExt; - else - LocInfo = CCValAssign::AExt; - } - - if (ValVT == MVT::i32) { - Reg = State.AllocateReg(IntRegs, RegsSize); - LocVT = MVT::i32; - } else if (ValVT == MVT::f32) { - Reg = State.AllocateReg(FltRegs, RegsSize); - LocVT = MVT::f32; - } + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + State.AllocateStack(SizeInBytes, SizeInBytes); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - if (!Reg) { - unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; - unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - } else { - unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; - State.AllocateStack(SizeInBytes, SizeInBytes); - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - } - - return false; // CC must always match + return true; } //===----------------------------------------------------------------------===// @@ -532,31 +691,35 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // MBlaze does not yet support tail call optimization isTailCall = false; + // The MBlaze requires stack slots for arguments passed to var arg + // functions even if they are passed in registers. + bool needsRegArgSlots = isVarArg; + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze2); + CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + + // Variable argument function calls require a minimum of 24-bytes of stack + if (isVarArg && NumBytes < 24) NumBytes = 24; + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; - // First/LastArgStackLoc contains the first/last - // "at stack" argument location. - int LastArgStackLoc = 0; - unsigned FirstStackArgLoc = 0; - // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); + MVT RegVT = VA.getLocVT(); SDValue Arg = OutVals[i]; // Promote the value if needed. @@ -582,20 +745,31 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // Register can't get to this point... assert(VA.isMemLoc()); + // Since we are alread passing values on the stack we don't + // need to worry about creating additional slots for the + // values passed via registers. + needsRegArgSlots = false; + // Create the frame index object for this incoming parameter - LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); - int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc, true); + unsigned ArgSize = VA.getValVT().getSizeInBits()/8; + unsigned StackLoc = VA.getLocMemOffset() + 4; + int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); // emit ISD::STORE whichs stores the // parameter value to a stack Location - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0)); } } + // If we need to reserve stack space for the arguments passed via registers + // then create a fixed stack object at the beginning of the stack. + if (needsRegArgSlots && TFI.hasReservedCallFrame(MF)) + MFI->CreateFixedObject(28,0,true); + // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -616,19 +790,18 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - unsigned char OpFlag = MBlazeII::MO_NO_FLAG; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - getPointerTy(), 0, OpFlag); + getPointerTy(), 0, 0); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), - getPointerTy(), OpFlag); + getPointerTy(), 0); // MBlazeJmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); @@ -678,7 +851,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); InVals.push_back(Chain.getValue(0)); - } + } return Chain; } @@ -713,30 +886,28 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze2); + CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze); SDValue StackPtr; - unsigned FirstStackArgLoc = 0; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; // Arguments stored on registers if (VA.isRegLoc()) { - EVT RegVT = VA.getLocVT(); + MVT RegVT = VA.getLocVT(); ArgRegEnd = VA.getLocReg(); TargetRegisterClass *RC = 0; if (RegVT == MVT::i32) - RC = MBlaze::CPURegsRegisterClass; + RC = MBlaze::GPRRegisterClass; else if (RegVT == MVT::f32) - RC = MBlaze::FGR32RegisterClass; + RC = MBlaze::GPRRegisterClass; else llvm_unreachable("RegVT not supported by LowerFormalArguments"); // Transform the arguments stored on // physical registers into virtual ones - unsigned Reg = MF.addLiveIn(ArgRegEnd, RC); + unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted @@ -756,9 +927,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, } InVals.push_back(ArgValue); - } else { // VA.isRegLoc() - // sanity check assert(VA.isMemLoc()); @@ -774,41 +943,44 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + unsigned StackLoc = VA.getLocMemOffset() + 4; int FI = MFI->CreateFixedObject(ArgSize, 0, true); - MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+ - (FirstStackArgLoc + VA.getLocMemOffset()))); + MBlazeFI->recordLoadArgsFI(FI, -StackLoc); + MBlazeFI->recordLiveIn(FI); // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } // To meet ABI, when VARARGS are passed on registers, the registers // must have their values written to the caller stack frame. If the last - // argument was placed in the stack, there's no need to save any register. + // argument was placed in the stack, there's no need to save any register. if ((isVarArg) && ArgRegEnd) { if (StackPtr.getNode() == 0) StackPtr = DAG.getRegister(StackReg, getPointerTy()); // The last register argument that must be saved is MBlaze::R10 - TargetRegisterClass *RC = MBlaze::CPURegsRegisterClass; + TargetRegisterClass *RC = MBlaze::GPRRegisterClass; unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5); unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1); unsigned End = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10); - unsigned StackLoc = ArgLocs.size()-1 + (Start - Begin); + unsigned StackLoc = Start - Begin + 1; for (; Start <= End; ++Start, ++StackLoc) { unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); - unsigned LiveReg = MF.addLiveIn(Reg, RC); + unsigned LiveReg = MF.addLiveIn(Reg, RC, dl); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); int FI = MFI->CreateFixedObject(4, 0, true); - MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); + MBlazeFI->recordStoreVarArgsFI(FI, -(StackLoc*4)); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); - OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, + MachinePointerInfo(), false, false, 0)); // Record the frame index of the first variable argument @@ -818,7 +990,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, } } - // All stores are grouped in one node to allow the matching between + // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { OutChains.push_back(Chain); @@ -872,13 +1044,18 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, Flag = Chain.getValue(1); } - // Return on MBlaze is always a "rtsd R15, 8" + // If this function is using the interrupt_handler calling convention + // then use "rtid r14, 0" otherwise use "rtsd r15, 8" + unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet + : MBlazeISD::Ret; + unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14 + : MBlaze::R15; + SDValue DReg = DAG.getRegister(Reg, MVT::i32); + if (Flag.getNode()) - return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other, - Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag); - else // Return Void - return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other, - Chain, DAG.getRegister(MBlaze::R15, MVT::i32)); + return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag); + + return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg); } //===----------------------------------------------------------------------===// @@ -909,6 +1086,37 @@ getConstraintType(const std::string &Constraint) const return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +MBlazeTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + const Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'd': + case 'y': + if (type->isIntegerTy()) + weight = CW_Register; + break; + case 'f': + if (type->isFloatTy()) + weight = CW_Register; + break; + } + return weight; +} + /// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), /// return a list of registers that can be used to satisfy the constraint. /// This should only be used for C_RegisterClass constraints. @@ -917,10 +1125,10 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - return std::make_pair(0U, MBlaze::CPURegsRegisterClass); + return std::make_pair(0U, MBlaze::GPRRegisterClass); case 'f': if (VT == MVT::f32) - return std::make_pair(0U, MBlaze::FGR32RegisterClass); + return std::make_pair(0U, MBlaze::GPRRegisterClass); } } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); @@ -940,6 +1148,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { // GCC MBlaze Constraint Letters case 'd': case 'y': + case 'f': return make_vector<unsigned>( MBlaze::R3, MBlaze::R4, MBlaze::R5, MBlaze::R6, MBlaze::R7, MBlaze::R9, MBlaze::R10, MBlaze::R11, @@ -947,15 +1156,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31, 0); - - case 'f': - return make_vector<unsigned>( - MBlaze::F3, MBlaze::F4, MBlaze::F5, MBlaze::F6, - MBlaze::F7, MBlaze::F9, MBlaze::F10, MBlaze::F11, - MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21, - MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25, - MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29, - MBlaze::F30, MBlaze::F31, 0); } return std::vector<unsigned>(); } diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 5ec2563..91649bc 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -15,6 +15,7 @@ #ifndef MBlazeISELLOWERING_H #define MBlazeISELLOWERING_H +#include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include "MBlaze.h" @@ -31,6 +32,30 @@ namespace llvm { GE, LE }; + + inline static CC getOppositeCondition(CC cc) { + switch (cc) { + default: llvm_unreachable("Unknown condition code"); + case EQ: return NE; + case NE: return EQ; + case GT: return LE; + case LT: return GE; + case GE: return LT; + case LE: return GE; + } + } + + inline static const char *MBlazeCCToString(CC cc) { + switch (cc) { + default: llvm_unreachable("Unknown condition code"); + case EQ: return "eq"; + case NE: return "ne"; + case GT: return "gt"; + case LT: return "lt"; + case GE: return "ge"; + case LE: return "le"; + } + } } namespace MBlazeISD { @@ -53,8 +78,11 @@ namespace llvm { // Integer Compare ICmp, - // Return - Ret + // Return from subroutine + Ret, + + // Return from interrupt + IRet }; } @@ -121,6 +149,15 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; + virtual MachineBasicBlock* + EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const; + + virtual MachineBasicBlock* + EmitCustomSelect(MachineInstr *MI, MachineBasicBlock *MBB) const; + + virtual MachineBasicBlock* + EmitCustomAtomic(MachineInstr *MI, MachineBasicBlock *MBB) const; + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; @@ -128,6 +165,11 @@ namespace llvm { // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td index 657b1d4..094de5c 100644 --- a/lib/Target/MBlaze/MBlazeInstrFPU.td +++ b/lib/Target/MBlaze/MBlazeInstrFPU.td @@ -19,72 +19,72 @@ // Memory Access Instructions //===----------------------------------------------------------------------===// class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> : - TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr), + TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>; + [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>; class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> : - TAI<op, (outs FGR32:$dst), (ins memri:$addr), - !strconcat(instr_asm, " $dst, $addr"), - [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>; + TB<op, (outs GPR:$dst), (ins memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>; class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> : - TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr), + TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>; + [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIStore>; class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> : - TAI<op, (outs), (ins FGR32:$dst, memrr:$addr), - !strconcat(instr_asm, " $dst, $addr"), - [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>; + TB<op, (outs), (ins GPR:$dst, memrr:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIStore>; class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, InstrItinClass itin> : - TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), + TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>; + [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>; class CmpFN<bits<6> op, bits<11> flags, string instr_asm, InstrItinClass itin> : - TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c), + TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), !strconcat(instr_asm, " $dst, $b, $c"), [], itin>; class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, InstrItinClass itin> : - TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), - !strconcat(instr_asm, " $dst, $c, $b"), - [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>; - -class ArithF2<bits<6> op, bits<11> flags, string instr_asm, - InstrItinClass itin> : - TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b), - !strconcat(instr_asm, " $dst, $b"), - [], itin>; - -class ArithIF<bits<6> op, bits<11> flags, string instr_asm, - InstrItinClass itin> : - TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b), - !strconcat(instr_asm, " $dst, $b"), - [], itin>; - -class ArithFI<bits<6> op, bits<11> flags, string instr_asm, - InstrItinClass itin> : - TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b), - !strconcat(instr_asm, " $dst, $b"), - [], itin>; + TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), + !strconcat(instr_asm, " $dst, $c, $b"), + [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>; class LogicF<bits<6> op, string instr_asm> : - TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c), - !strconcat(instr_asm, " $dst, $b, $c"), - [], - IIAlu>; + TB<op, (outs GPR:$dst), (ins GPR:$b, GPR:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; class LogicFI<bits<6> op, string instr_asm> : - TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c), - !strconcat(instr_asm, " $dst, $b, $c"), - [], - IIAlu>; + TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; + +let rb=0 in { + class ArithF2<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs GPR:$dst), (ins GPR:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; + + class ArithIF<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs GPR:$dst), (ins GPR:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; + + class ArithFI<bits<6> op, bits<11> flags, string instr_asm, + InstrItinClass itin> : + TA<op, flags, (outs GPR:$dst), (ins GPR:$b), + !strconcat(instr_asm, " $dst, $b"), + [], itin>; +} //===----------------------------------------------------------------------===// // Pseudo instructions @@ -94,24 +94,25 @@ class LogicFI<bits<6> op, string instr_asm> : // FPU Arithmetic Instructions //===----------------------------------------------------------------------===// let Predicates=[HasFPU] in { - def FOR : LogicF<0x28, "or ">; def FORI : LogicFI<0x28, "ori ">; def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>; def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>; def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>; def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>; +} - def LWF : LoadFM<0x32, "lw ", load>; - def LWFI : LoadFMI<0x32, "lwi ", load>; +let Predicates=[HasFPU], isCodeGenOnly=1 in { + def LWF : LoadFM<0x32, "lw ", load>; + def LWFI : LoadFMI<0x3A, "lwi ", load>; - def SWF : StoreFM<0x32, "sw ", store>; - def SWFI : StoreFMI<0x32, "swi ", store>; + def SWF : StoreFM<0x36, "sw ", store>; + def SWFI : StoreFMI<0x3E, "swi ", store>; } let Predicates=[HasFPU,HasSqrt] in { def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>; def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>; - def FSQRT : ArithF2<0x16, 0x300, "fsqrt ", IIAlu>; + def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIAlu>; } let isAsCheapAsAMove = 1 in { @@ -126,98 +127,98 @@ let isAsCheapAsAMove = 1 in { let usesCustomInserter = 1 in { - def Select_FCC : MBlazePseudo<(outs FGR32:$dst), - (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC), + def Select_FCC : MBlazePseudo<(outs GPR:$dst), + (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), "; SELECT_FCC PSEUDO!", []>; } // Floating point conversions let Predicates=[HasFPU] in { - def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>; - def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>; - def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>; + def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>; + def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>; + def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>; } // SET_CC operations let Predicates=[HasFPU] in { - def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_EQ FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_EQ FGR32:$L, FGR32:$R), 1)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_EQ FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (XOR (FCMP_UN FGR32:$L, FGR32:$R), - (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (OR (FCMP_UN FGR32:$L, FGR32:$R), - (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_GT FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_LT FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_GE FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_LE FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_GT FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_LT FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_GE FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_LE FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (OR (FCMP_UN FGR32:$L, FGR32:$R), - (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_NE FGR32:$L, FGR32:$R), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (OR (FCMP_UN FGR32:$L, FGR32:$R), - (FCMP_GT FGR32:$L, FGR32:$R)), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (OR (FCMP_UN FGR32:$L, FGR32:$R), - (FCMP_LT FGR32:$L, FGR32:$R)), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (OR (FCMP_UN FGR32:$L, FGR32:$R), - (FCMP_GE FGR32:$L, FGR32:$R)), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (OR (FCMP_UN FGR32:$L, FGR32:$R), - (FCMP_LE FGR32:$L, FGR32:$R)), 2)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETO), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_UN FGR32:$L, FGR32:$R), 1)>; - def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (FCMP_UN FGR32:$L, FGR32:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_EQ GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_EQ GPR:$L, GPR:$R), 1)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_EQ GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (XOR (FCMP_UN GPR:$L, GPR:$R), + (FCMP_EQ GPR:$L, GPR:$R)), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (OR (FCMP_UN GPR:$L, GPR:$R), + (FCMP_EQ GPR:$L, GPR:$R)), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_GT GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_LT GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_GE GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_LE GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_GT GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_LT GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_GE GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_LE GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (OR (FCMP_UN GPR:$L, GPR:$R), + (FCMP_EQ GPR:$L, GPR:$R)), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_NE GPR:$L, GPR:$R), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (OR (FCMP_UN GPR:$L, GPR:$R), + (FCMP_GT GPR:$L, GPR:$R)), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (OR (FCMP_UN GPR:$L, GPR:$R), + (FCMP_LT GPR:$L, GPR:$R)), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (OR (FCMP_UN GPR:$L, GPR:$R), + (FCMP_GE GPR:$L, GPR:$R)), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (OR (FCMP_UN GPR:$L, GPR:$R), + (FCMP_LE GPR:$L, GPR:$R)), 2)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_UN GPR:$L, GPR:$R), 1)>; + def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (FCMP_UN GPR:$L, GPR:$R), 2)>; } // SELECT operations -def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F), - (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>; +def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)), + (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>; //===----------------------------------------------------------------------===// // Patterns for Floating Point Instructions //===----------------------------------------------------------------------===// -def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>; +def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>; diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td index 5158411..3209845 100644 --- a/lib/Target/MBlaze/MBlazeInstrFSL.td +++ b/lib/Target/MBlaze/MBlazeInstrFSL.td @@ -10,144 +10,220 @@ //===----------------------------------------------------------------------===// // FSL Instruction Formats //===----------------------------------------------------------------------===// -class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : - TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b), - !strconcat(instr_asm, " $dst, $b"), - [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>; - -class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> : - TAI<op, (outs CPURegs:$dst), (ins fslimm:$b), - !strconcat(instr_asm, " $dst, $b"), - [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>; - -class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : - TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b), - !strconcat(instr_asm, " $v, $b"), - [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>; - -class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> : - TAI<op, (outs), (ins CPURegs:$v, fslimm:$b), - !strconcat(instr_asm, " $v, $b"), - [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>; - -class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> : - TA<op, flags, (outs), (ins CPURegs:$b), - !strconcat(instr_asm, " $b"), - [(OpNode CPURegs:$b)], IIAlu>; - -class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> : - TAI<op, (outs), (ins fslimm:$b), - !strconcat(instr_asm, " $b"), - [(OpNode immZExt4:$b)], IIAlu>; +class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> : + MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b), + !strconcat(instr_asm, " $dst, $b"), + [(set GPR:$dst, (OpNode immZExt4:$b))],IIAlu> +{ + bits<5> rd; + bits<4> fslno; + + let Inst{6-10} = rd; + let Inst{11-15} = 0x0; + let Inst{16} = 0x0; + let Inst{17-21} = flags; // NCTAE + let Inst{22-27} = 0x0; + let Inst{28-31} = fslno; +} + +class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> : + MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b), + !strconcat(instr_asm, " $dst, $b"), + [(set GPR:$dst, (OpNode GPR:$b))], IIAlu> +{ + bits<5> rd; + bits<5> rb; + + let Inst{6-10} = rd; + let Inst{11-15} = 0x0; + let Inst{16-20} = rb; + let Inst{21} = 0x0; + let Inst{22-26} = flags; // NCTAE + let Inst{27-31} = 0x0; +} + +class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : + MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b), + !strconcat(instr_asm, " $v, $b"), + [(OpNode GPR:$v, immZExt4:$b)], IIAlu> +{ + bits<5> ra; + bits<4> fslno; + + let Inst{6-10} = 0x0; + let Inst{11-15} = ra; + let Inst{16} = 0x1; + let Inst{17-20} = flags; // NCTA + let Inst{21-27} = 0x0; + let Inst{28-31} = fslno; +} + +class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : + MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b), + !strconcat(instr_asm, " $v, $b"), + [(OpNode GPR:$v, GPR:$b)], IIAlu> +{ + bits<5> ra; + bits<5> rb; + + let Inst{6-10} = 0x0; + let Inst{11-15} = ra; + let Inst{16-20} = rb; + let Inst{21} = 0x1; + let Inst{22-25} = flags; // NCTA + let Inst{26-31} = 0x0; +} + +class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : + MBlazeInst<op, FCX, (outs), (ins fslimm:$b), + !strconcat(instr_asm, " $b"), + [(OpNode immZExt4:$b)], IIAlu> +{ + bits<4> fslno; + + let Inst{6-10} = 0x0; + let Inst{11-15} = 0x0; + let Inst{16} = 0x1; + let Inst{17-20} = flags; // NCTA + let Inst{21-27} = 0x0; + let Inst{28-31} = fslno; +} + +class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> : + MBlazeInst<op, FCR, (outs), (ins GPR:$b), + !strconcat(instr_asm, " $b"), + [(OpNode GPR:$b)], IIAlu> +{ + bits<5> rb; + + let Inst{6-10} = 0x0; + let Inst{11-15} = 0x0; + let Inst{16-20} = rb; + let Inst{21} = 0x1; + let Inst{22-25} = flags; // NCTA + let Inst{26-31} = 0x0; +} //===----------------------------------------------------------------------===// // FSL Get Instructions //===----------------------------------------------------------------------===// -def GET : FSLGet<0x1B, "get ", int_mblaze_fsl_get>; -def AGET : FSLGet<0x1B, "aget ", int_mblaze_fsl_aget>; -def CGET : FSLGet<0x1B, "cget ", int_mblaze_fsl_cget>; -def CAGET : FSLGet<0x1B, "caget ", int_mblaze_fsl_caget>; -def EGET : FSLGet<0x1B, "eget ", int_mblaze_fsl_eget>; -def EAGET : FSLGet<0x1B, "eaget ", int_mblaze_fsl_eaget>; -def ECGET : FSLGet<0x1B, "ecget ", int_mblaze_fsl_ecget>; -def ECAGET : FSLGet<0x1B, "ecaget ", int_mblaze_fsl_ecaget>; -def NGET : FSLGet<0x1B, "nget ", int_mblaze_fsl_nget>; -def NAGET : FSLGet<0x1B, "naget ", int_mblaze_fsl_naget>; -def NCGET : FSLGet<0x1B, "ncget ", int_mblaze_fsl_ncget>; -def NCAGET : FSLGet<0x1B, "ncaget ", int_mblaze_fsl_ncaget>; -def NEGET : FSLGet<0x1B, "neget ", int_mblaze_fsl_neget>; -def NEAGET : FSLGet<0x1B, "neaget ", int_mblaze_fsl_neaget>; -def NECGET : FSLGet<0x1B, "necget ", int_mblaze_fsl_necget>; -def NECAGET : FSLGet<0x1B, "necaget ", int_mblaze_fsl_necaget>; -def TGET : FSLGet<0x1B, "tget ", int_mblaze_fsl_tget>; -def TAGET : FSLGet<0x1B, "taget ", int_mblaze_fsl_taget>; -def TCGET : FSLGet<0x1B, "tcget ", int_mblaze_fsl_tcget>; -def TCAGET : FSLGet<0x1B, "tcaget ", int_mblaze_fsl_tcaget>; -def TEGET : FSLGet<0x1B, "teget ", int_mblaze_fsl_teget>; -def TEAGET : FSLGet<0x1B, "teaget ", int_mblaze_fsl_teaget>; -def TECGET : FSLGet<0x1B, "tecget ", int_mblaze_fsl_tecget>; -def TECAGET : FSLGet<0x1B, "tecaget ", int_mblaze_fsl_tecaget>; -def TNGET : FSLGet<0x1B, "tnget ", int_mblaze_fsl_tnget>; -def TNAGET : FSLGet<0x1B, "tnaget ", int_mblaze_fsl_tnaget>; -def TNCGET : FSLGet<0x1B, "tncget ", int_mblaze_fsl_tncget>; -def TNCAGET : FSLGet<0x1B, "tncaget ", int_mblaze_fsl_tncaget>; -def TNEGET : FSLGet<0x1B, "tneget ", int_mblaze_fsl_tneget>; -def TNEAGET : FSLGet<0x1B, "tneaget ", int_mblaze_fsl_tneaget>; -def TNECGET : FSLGet<0x1B, "tnecget ", int_mblaze_fsl_tnecget>; -def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>; +def GET : FSLGet<0x1B, 0x00, "get ", int_mblaze_fsl_get>; +def AGET : FSLGet<0x1B, 0x02, "aget ", int_mblaze_fsl_aget>; +def CGET : FSLGet<0x1B, 0x08, "cget ", int_mblaze_fsl_cget>; +def CAGET : FSLGet<0x1B, 0x0A, "caget ", int_mblaze_fsl_caget>; +def EGET : FSLGet<0x1B, 0x01, "eget ", int_mblaze_fsl_eget>; +def EAGET : FSLGet<0x1B, 0x03, "eaget ", int_mblaze_fsl_eaget>; +def ECGET : FSLGet<0x1B, 0x09, "ecget ", int_mblaze_fsl_ecget>; +def ECAGET : FSLGet<0x1B, 0x0B, "ecaget ", int_mblaze_fsl_ecaget>; +def TGET : FSLGet<0x1B, 0x04, "tget ", int_mblaze_fsl_tget>; +def TAGET : FSLGet<0x1B, 0x06, "taget ", int_mblaze_fsl_taget>; +def TCGET : FSLGet<0x1B, 0x0C, "tcget ", int_mblaze_fsl_tcget>; +def TCAGET : FSLGet<0x1B, 0x0E, "tcaget ", int_mblaze_fsl_tcaget>; +def TEGET : FSLGet<0x1B, 0x05, "teget ", int_mblaze_fsl_teget>; +def TEAGET : FSLGet<0x1B, 0x07, "teaget ", int_mblaze_fsl_teaget>; +def TECGET : FSLGet<0x1B, 0x0D, "tecget ", int_mblaze_fsl_tecget>; +def TECAGET : FSLGet<0x1B, 0x0F, "tecaget ", int_mblaze_fsl_tecaget>; + +let Defs = [CARRY] in { + def NGET : FSLGet<0x1B, 0x10, "nget ", int_mblaze_fsl_nget>; + def NAGET : FSLGet<0x1B, 0x12, "naget ", int_mblaze_fsl_naget>; + def NCGET : FSLGet<0x1B, 0x18, "ncget ", int_mblaze_fsl_ncget>; + def NCAGET : FSLGet<0x1B, 0x1A, "ncaget ", int_mblaze_fsl_ncaget>; + def NEGET : FSLGet<0x1B, 0x11, "neget ", int_mblaze_fsl_neget>; + def NEAGET : FSLGet<0x1B, 0x13, "neaget ", int_mblaze_fsl_neaget>; + def NECGET : FSLGet<0x1B, 0x19, "necget ", int_mblaze_fsl_necget>; + def NECAGET : FSLGet<0x1B, 0x1B, "necaget ", int_mblaze_fsl_necaget>; + def TNGET : FSLGet<0x1B, 0x14, "tnget ", int_mblaze_fsl_tnget>; + def TNAGET : FSLGet<0x1B, 0x16, "tnaget ", int_mblaze_fsl_tnaget>; + def TNCGET : FSLGet<0x1B, 0x1C, "tncget ", int_mblaze_fsl_tncget>; + def TNCAGET : FSLGet<0x1B, 0x1E, "tncaget ", int_mblaze_fsl_tncaget>; + def TNEGET : FSLGet<0x1B, 0x15, "tneget ", int_mblaze_fsl_tneget>; + def TNEAGET : FSLGet<0x1B, 0x17, "tneaget ", int_mblaze_fsl_tneaget>; + def TNECGET : FSLGet<0x1B, 0x1D, "tnecget ", int_mblaze_fsl_tnecget>; + def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>; +} //===----------------------------------------------------------------------===// // FSL Dynamic Get Instructions //===----------------------------------------------------------------------===// -def GETD : FSLGetD<0x1B, 0x00, "getd ", int_mblaze_fsl_get>; -def AGETD : FSLGetD<0x1B, 0x00, "agetd ", int_mblaze_fsl_aget>; -def CGETD : FSLGetD<0x1B, 0x00, "cgetd ", int_mblaze_fsl_cget>; -def CAGETD : FSLGetD<0x1B, 0x00, "cagetd ", int_mblaze_fsl_caget>; -def EGETD : FSLGetD<0x1B, 0x00, "egetd ", int_mblaze_fsl_eget>; -def EAGETD : FSLGetD<0x1B, 0x00, "eagetd ", int_mblaze_fsl_eaget>; -def ECGETD : FSLGetD<0x1B, 0x00, "ecgetd ", int_mblaze_fsl_ecget>; -def ECAGETD : FSLGetD<0x1B, 0x00, "ecagetd ", int_mblaze_fsl_ecaget>; -def NGETD : FSLGetD<0x1B, 0x00, "ngetd ", int_mblaze_fsl_nget>; -def NAGETD : FSLGetD<0x1B, 0x00, "nagetd ", int_mblaze_fsl_naget>; -def NCGETD : FSLGetD<0x1B, 0x00, "ncgetd ", int_mblaze_fsl_ncget>; -def NCAGETD : FSLGetD<0x1B, 0x00, "ncagetd ", int_mblaze_fsl_ncaget>; -def NEGETD : FSLGetD<0x1B, 0x00, "negetd ", int_mblaze_fsl_neget>; -def NEAGETD : FSLGetD<0x1B, 0x00, "neagetd ", int_mblaze_fsl_neaget>; -def NECGETD : FSLGetD<0x1B, 0x00, "necgetd ", int_mblaze_fsl_necget>; -def NECAGETD : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>; -def TGETD : FSLGetD<0x1B, 0x00, "tgetd ", int_mblaze_fsl_tget>; -def TAGETD : FSLGetD<0x1B, 0x00, "tagetd ", int_mblaze_fsl_taget>; -def TCGETD : FSLGetD<0x1B, 0x00, "tcgetd ", int_mblaze_fsl_tcget>; -def TCAGETD : FSLGetD<0x1B, 0x00, "tcagetd ", int_mblaze_fsl_tcaget>; -def TEGETD : FSLGetD<0x1B, 0x00, "tegetd ", int_mblaze_fsl_teget>; -def TEAGETD : FSLGetD<0x1B, 0x00, "teagetd ", int_mblaze_fsl_teaget>; -def TECGETD : FSLGetD<0x1B, 0x00, "tecgetd ", int_mblaze_fsl_tecget>; -def TECAGETD : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>; -def TNGETD : FSLGetD<0x1B, 0x00, "tngetd ", int_mblaze_fsl_tnget>; -def TNAGETD : FSLGetD<0x1B, 0x00, "tnagetd ", int_mblaze_fsl_tnaget>; -def TNCGETD : FSLGetD<0x1B, 0x00, "tncgetd ", int_mblaze_fsl_tncget>; -def TNCAGETD : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>; -def TNEGETD : FSLGetD<0x1B, 0x00, "tnegetd ", int_mblaze_fsl_tneget>; -def TNEAGETD : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>; -def TNECGETD : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>; -def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>; +def GETD : FSLGetD<0x13, 0x00, "getd ", int_mblaze_fsl_get>; +def AGETD : FSLGetD<0x13, 0x02, "agetd ", int_mblaze_fsl_aget>; +def CGETD : FSLGetD<0x13, 0x08, "cgetd ", int_mblaze_fsl_cget>; +def CAGETD : FSLGetD<0x13, 0x0A, "cagetd ", int_mblaze_fsl_caget>; +def EGETD : FSLGetD<0x13, 0x01, "egetd ", int_mblaze_fsl_eget>; +def EAGETD : FSLGetD<0x13, 0x03, "eagetd ", int_mblaze_fsl_eaget>; +def ECGETD : FSLGetD<0x13, 0x09, "ecgetd ", int_mblaze_fsl_ecget>; +def ECAGETD : FSLGetD<0x13, 0x0B, "ecagetd ", int_mblaze_fsl_ecaget>; +def TGETD : FSLGetD<0x13, 0x04, "tgetd ", int_mblaze_fsl_tget>; +def TAGETD : FSLGetD<0x13, 0x06, "tagetd ", int_mblaze_fsl_taget>; +def TCGETD : FSLGetD<0x13, 0x0C, "tcgetd ", int_mblaze_fsl_tcget>; +def TCAGETD : FSLGetD<0x13, 0x0E, "tcagetd ", int_mblaze_fsl_tcaget>; +def TEGETD : FSLGetD<0x13, 0x05, "tegetd ", int_mblaze_fsl_teget>; +def TEAGETD : FSLGetD<0x13, 0x07, "teagetd ", int_mblaze_fsl_teaget>; +def TECGETD : FSLGetD<0x13, 0x0D, "tecgetd ", int_mblaze_fsl_tecget>; +def TECAGETD : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>; + +let Defs = [CARRY] in { + def NGETD : FSLGetD<0x13, 0x10, "ngetd ", int_mblaze_fsl_nget>; + def NAGETD : FSLGetD<0x13, 0x12, "nagetd ", int_mblaze_fsl_naget>; + def NCGETD : FSLGetD<0x13, 0x18, "ncgetd ", int_mblaze_fsl_ncget>; + def NCAGETD : FSLGetD<0x13, 0x1A, "ncagetd ", int_mblaze_fsl_ncaget>; + def NEGETD : FSLGetD<0x13, 0x11, "negetd ", int_mblaze_fsl_neget>; + def NEAGETD : FSLGetD<0x13, 0x13, "neagetd ", int_mblaze_fsl_neaget>; + def NECGETD : FSLGetD<0x13, 0x19, "necgetd ", int_mblaze_fsl_necget>; + def NECAGETD : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>; + def TNGETD : FSLGetD<0x13, 0x14, "tngetd ", int_mblaze_fsl_tnget>; + def TNAGETD : FSLGetD<0x13, 0x16, "tnagetd ", int_mblaze_fsl_tnaget>; + def TNCGETD : FSLGetD<0x13, 0x1C, "tncgetd ", int_mblaze_fsl_tncget>; + def TNCAGETD : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>; + def TNEGETD : FSLGetD<0x13, 0x15, "tnegetd ", int_mblaze_fsl_tneget>; + def TNEAGETD : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>; + def TNECGETD : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>; + def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>; +} //===----------------------------------------------------------------------===// // FSL Put Instructions //===----------------------------------------------------------------------===// -def PUT : FSLPut<0x1B, "put ", int_mblaze_fsl_put>; -def APUT : FSLPut<0x1B, "aput ", int_mblaze_fsl_aput>; -def CPUT : FSLPut<0x1B, "cput ", int_mblaze_fsl_cput>; -def CAPUT : FSLPut<0x1B, "caput ", int_mblaze_fsl_caput>; -def NPUT : FSLPut<0x1B, "nput ", int_mblaze_fsl_nput>; -def NAPUT : FSLPut<0x1B, "naput ", int_mblaze_fsl_naput>; -def NCPUT : FSLPut<0x1B, "ncput ", int_mblaze_fsl_ncput>; -def NCAPUT : FSLPut<0x1B, "ncaput ", int_mblaze_fsl_ncaput>; -def TPUT : FSLPutT<0x1B, "tput ", int_mblaze_fsl_tput>; -def TAPUT : FSLPutT<0x1B, "taput ", int_mblaze_fsl_taput>; -def TCPUT : FSLPutT<0x1B, "tcput ", int_mblaze_fsl_tcput>; -def TCAPUT : FSLPutT<0x1B, "tcaput ", int_mblaze_fsl_tcaput>; -def TNPUT : FSLPutT<0x1B, "tnput ", int_mblaze_fsl_tnput>; -def TNAPUT : FSLPutT<0x1B, "tnaput ", int_mblaze_fsl_tnaput>; -def TNCPUT : FSLPutT<0x1B, "tncput ", int_mblaze_fsl_tncput>; -def TNCAPUT : FSLPutT<0x1B, "tncaput ", int_mblaze_fsl_tncaput>; +def PUT : FSLPut<0x1B, 0x0, "put ", int_mblaze_fsl_put>; +def APUT : FSLPut<0x1B, 0x1, "aput ", int_mblaze_fsl_aput>; +def CPUT : FSLPut<0x1B, 0x4, "cput ", int_mblaze_fsl_cput>; +def CAPUT : FSLPut<0x1B, 0x5, "caput ", int_mblaze_fsl_caput>; +def TPUT : FSLPutT<0x1B, 0x2, "tput ", int_mblaze_fsl_tput>; +def TAPUT : FSLPutT<0x1B, 0x3, "taput ", int_mblaze_fsl_taput>; +def TCPUT : FSLPutT<0x1B, 0x6, "tcput ", int_mblaze_fsl_tcput>; +def TCAPUT : FSLPutT<0x1B, 0x7, "tcaput ", int_mblaze_fsl_tcaput>; + +let Defs = [CARRY] in { + def NPUT : FSLPut<0x1B, 0x8, "nput ", int_mblaze_fsl_nput>; + def NAPUT : FSLPut<0x1B, 0x9, "naput ", int_mblaze_fsl_naput>; + def NCPUT : FSLPut<0x1B, 0xC, "ncput ", int_mblaze_fsl_ncput>; + def NCAPUT : FSLPut<0x1B, 0xD, "ncaput ", int_mblaze_fsl_ncaput>; + def TNPUT : FSLPutT<0x1B, 0xA, "tnput ", int_mblaze_fsl_tnput>; + def TNAPUT : FSLPutT<0x1B, 0xB, "tnaput ", int_mblaze_fsl_tnaput>; + def TNCPUT : FSLPutT<0x1B, 0xE, "tncput ", int_mblaze_fsl_tncput>; + def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput ", int_mblaze_fsl_tncaput>; +} //===----------------------------------------------------------------------===// // FSL Dynamic Put Instructions //===----------------------------------------------------------------------===// -def PUTD : FSLPutD<0x1B, 0x00, "putd ", int_mblaze_fsl_put>; -def APUTD : FSLPutD<0x1B, 0x00, "aputd ", int_mblaze_fsl_aput>; -def CPUTD : FSLPutD<0x1B, 0x00, "cputd ", int_mblaze_fsl_cput>; -def CAPUTD : FSLPutD<0x1B, 0x00, "caputd ", int_mblaze_fsl_caput>; -def NPUTD : FSLPutD<0x1B, 0x00, "nputd ", int_mblaze_fsl_nput>; -def NAPUTD : FSLPutD<0x1B, 0x00, "naputd ", int_mblaze_fsl_naput>; -def NCPUTD : FSLPutD<0x1B, 0x00, "ncputd ", int_mblaze_fsl_ncput>; -def NCAPUTD : FSLPutD<0x1B, 0x00, "ncaputd ", int_mblaze_fsl_ncaput>; -def TPUTD : FSLPutTD<0x1B, 0x00, "tputd ", int_mblaze_fsl_tput>; -def TAPUTD : FSLPutTD<0x1B, 0x00, "taputd ", int_mblaze_fsl_taput>; -def TCPUTD : FSLPutTD<0x1B, 0x00, "tcputd ", int_mblaze_fsl_tcput>; -def TCAPUTD : FSLPutTD<0x1B, 0x00, "tcaputd ", int_mblaze_fsl_tcaput>; -def TNPUTD : FSLPutTD<0x1B, 0x00, "tnputd ", int_mblaze_fsl_tnput>; -def TNAPUTD : FSLPutTD<0x1B, 0x00, "tnaputd ", int_mblaze_fsl_tnaput>; -def TNCPUTD : FSLPutTD<0x1B, 0x00, "tncputd ", int_mblaze_fsl_tncput>; -def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>; +def PUTD : FSLPutD<0x13, 0x0, "putd ", int_mblaze_fsl_put>; +def APUTD : FSLPutD<0x13, 0x1, "aputd ", int_mblaze_fsl_aput>; +def CPUTD : FSLPutD<0x13, 0x4, "cputd ", int_mblaze_fsl_cput>; +def CAPUTD : FSLPutD<0x13, 0x5, "caputd ", int_mblaze_fsl_caput>; +def TPUTD : FSLPutTD<0x13, 0x2, "tputd ", int_mblaze_fsl_tput>; +def TAPUTD : FSLPutTD<0x13, 0x3, "taputd ", int_mblaze_fsl_taput>; +def TCPUTD : FSLPutTD<0x13, 0x6, "tcputd ", int_mblaze_fsl_tcput>; +def TCAPUTD : FSLPutTD<0x13, 0x7, "tcaputd ", int_mblaze_fsl_tcaput>; + +let Defs = [CARRY] in { + def NPUTD : FSLPutD<0x13, 0x8, "nputd ", int_mblaze_fsl_nput>; + def NAPUTD : FSLPutD<0x13, 0x9, "naputd ", int_mblaze_fsl_naput>; + def NCPUTD : FSLPutD<0x13, 0xC, "ncputd ", int_mblaze_fsl_ncput>; + def NCAPUTD : FSLPutD<0x13, 0xD, "ncaputd ", int_mblaze_fsl_ncaput>; + def TNPUTD : FSLPutTD<0x13, 0xA, "tnputd ", int_mblaze_fsl_tnput>; + def TNAPUTD : FSLPutTD<0x13, 0xB, "tnaputd ", int_mblaze_fsl_tnaput>; + def TNCPUTD : FSLPutTD<0x13, 0xE, "tncputd ", int_mblaze_fsl_tncput>; + def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>; +} diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td index 28e8e44..d62574d 100644 --- a/lib/Target/MBlaze/MBlazeInstrFormats.td +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -7,6 +7,35 @@ // //===----------------------------------------------------------------------===// +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format<bits<6> val> { + bits<6> Value = val; +} + +def FPseudo : Format<0>; +def FRRR : Format<1>; // ADD, OR, etc. +def FRRI : Format<2>; // ADDI, ORI, etc. +def FCRR : Format<3>; // PUTD, WDC, WIC, BEQ, BNE, BGE, etc. +def FCRI : Format<4>; // RTID, RTED, RTSD, BEQI, BNEI, BGEI, etc. +def FRCR : Format<5>; // BRLD, BRALD, GETD +def FRCI : Format<6>; // BRLID, BRALID, MSRCLR, MSRSET +def FCCR : Format<7>; // BR, BRA, BRD, etc. +def FCCI : Format<8>; // IMM, BRI, BRAI, BRID, etc. +def FRRCI : Format<9>; // BSRLI, BSRAI, BSLLI +def FRRC : Format<10>; // SEXT8, SEXT16, SRA, SRC, SRL, FLT, FINT, FSQRT +def FRCX : Format<11>; // GET +def FRCS : Format<12>; // MFS +def FCRCS : Format<13>; // MTS +def FCRCX : Format<14>; // PUT +def FCX : Format<15>; // TPUT +def FCR : Format<16>; // TPUTD +def FRIR : Format<17>; // RSUBI +def FRRRR : Format<18>; // RSUB, FRSUB +def FRI : Format<19>; // RSUB, FRSUB +def FC : Format<20>; // NOP + //===----------------------------------------------------------------------===// // Describe MBlaze instructions format // @@ -21,226 +50,155 @@ //===----------------------------------------------------------------------===// // Generic MBlaze Format -class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin> : Instruction -{ - field bits<32> Inst; - +class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : Instruction { let Namespace = "MBlaze"; + field bits<32> Inst; - bits<6> opcode; + bits<6> opcode = op; + Format Form = form; + bits<6> FormBits = Form.Value; // Top 6 bits are the 'opcode' field - let Inst{0-5} = opcode; - + let Inst{0-5} = opcode; + + // If the instruction is marked as a pseudo, set isCodeGenOnly so that the + // assembler and disassmbler ignore it. + let isCodeGenOnly = !eq(!cast<string>(form), "FPseudo"); + dag OutOperandList = outs; dag InOperandList = ins; let AsmString = asmstr; let Pattern = pattern; let Itinerary = itin; + + // TSFlags layout should be kept in sync with MBlazeInstrInfo.h. + let TSFlags{5-0} = FormBits; } //===----------------------------------------------------------------------===// // Pseudo instruction class //===----------------------------------------------------------------------===// class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: - MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>; + MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>; //===----------------------------------------------------------------------===// // Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|> //===----------------------------------------------------------------------===// class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr, - list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin> { bits<5> rd; bits<5> ra; bits<5> rb; - let opcode = op; - let Inst{6-10} = rd; - let Inst{11-15} = ra; + let Inst{11-15} = ra; let Inst{16-20} = rb; let Inst{21-31} = flags; } -class TAI<bits<6> op, dag outs, dag ins, string asmstr, - list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<5> rd; - bits<5> ra; - bits<16> imm16; - - let opcode = op; - - let Inst{6-10} = rd; - let Inst{11-15} = ra; - let Inst{16-31} = imm16; -} - -class TIMM<bits<6> op, dag outs, dag ins, string asmstr, - list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<5> ra; - bits<16> imm16; - - let opcode = op; - - let Inst{6-15} = 0; - let Inst{16-31} = imm16; -} - -class TADDR<bits<6> op, dag outs, dag ins, string asmstr, - list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<26> addr; - - let opcode = op; - - let Inst{6-31} = addr; -} - //===----------------------------------------------------------------------===// // Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|> //===----------------------------------------------------------------------===// class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> + InstrItinClass itin> : + MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> { bits<5> rd; bits<5> ra; bits<16> imm16; - let opcode = op; - let Inst{6-10} = rd; - let Inst{11-15} = ra; + let Inst{11-15} = ra; let Inst{16-31} = imm16; } //===----------------------------------------------------------------------===// -// Float instruction class in MBlaze : <|opcode|rd|ra|flags|> +// Type A instruction class in MBlaze but with the operands reversed +// in the LLVM DAG : <|opcode|rd|ra|rb|flags|> //===----------------------------------------------------------------------===// -class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr, - list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> +class TAR<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + TA<op, flags, outs, ins, asmstr, pattern, itin> { - bits<5> rd; - bits<5> ra; + bits<5> rrd; + bits<5> rrb; + bits<5> rra; - let opcode = op; + let Form = FRRRR; - let Inst{6-10} = rd; - let Inst{11-15} = ra; - let Inst{16-20} = 0; - let Inst{21-31} = flags; + let rd = rrd; + let ra = rra; + let rb = rrb; } //===----------------------------------------------------------------------===// -// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|> +// Type B instruction class in MBlaze but with the operands reversed in +// the LLVM DAG : <|opcode|rd|ra|immediate|> //===----------------------------------------------------------------------===// - -class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, - string asmstr, list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<5> ra; - - let opcode = op; - - let Inst{6-10} = 0; - let Inst{11-15} = br; - let Inst{16-20} = ra; - let Inst{21-31} = flags; -} - -class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, - string asmstr, list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<5> ra; - bits<5> rb; - - let opcode = op; - - let Inst{6-10} = br; - let Inst{11-15} = ra; - let Inst{16-20} = rb; - let Inst{21-31} = flags; -} - -class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins, - string asmstr, list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<5> ra; - - let opcode = op; - - let Inst{6-10} = 0xF; - let Inst{11-15} = br; - let Inst{16-20} = ra; - let Inst{21-31} = flags; +class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, + InstrItinClass itin> : + TB<op, outs, ins, asmstr, pattern, itin> { + bits<5> rrd; + bits<16> rimm16; + bits<5> rra; + + let Form = FRIR; + + let rd = rrd; + let ra = rra; + let imm16 = rimm16; } -class TBRI<bits<6> op, bits<5> br, dag outs, dag ins, - string asmstr, list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<16> imm16; - - let opcode = op; - - let Inst{6-10} = 0; - let Inst{11-15} = br; - let Inst{16-31} = imm16; -} - -class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins, - string asmstr, list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<16> imm16; - - let opcode = op; +//===----------------------------------------------------------------------===// +// Shift immediate instruction class in MBlaze : <|opcode|rd|ra|immediate|> +//===----------------------------------------------------------------------===// +class SHT<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> { + bits<5> rd; + bits<5> ra; + bits<5> imm5; - let Inst{6-10} = 0xF; - let Inst{11-15} = br; - let Inst{16-31} = imm16; + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-20} = 0x0; + let Inst{21-22} = flags; + let Inst{23-26} = 0x0; + let Inst{27-31} = imm5; } -class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins, - string asmstr, list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<5> ra; - bits<16> imm16; - - let opcode = op; +//===----------------------------------------------------------------------===// +// Special instruction class in MBlaze : <|opcode|rd|imm14|> +//===----------------------------------------------------------------------===// +class SPC<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> { + bits<5> rd; + bits<14> imm14; - let Inst{6-10} = br; - let Inst{11-15} = ra; - let Inst{16-31} = imm16; + let Inst{6-10} = rd; + let Inst{11-15} = 0x0; + let Inst{16-17} = flags; + let Inst{18-31} = imm14; } -class TRET<bits<6> op, dag outs, dag ins, - string asmstr, list<dag> pattern, InstrItinClass itin> : - MBlazeInst<outs, ins, asmstr, pattern, itin> -{ - bits<5> ra; - bits<16> imm16; - - let opcode = op; +//===----------------------------------------------------------------------===// +// MSR instruction class in MBlaze : <|opcode|rd|imm15|> +//===----------------------------------------------------------------------===// +class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> { + bits<5> rd; + bits<15> imm15; - let Inst{6-10} = 0x10; - let Inst{11-15} = ra; - let Inst{16-31} = imm16; + let Inst{6-10} = rd; + let Inst{11-16} = flags; + let Inst{17-31} = imm15; } diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index b590c09..b353dcd 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -38,10 +38,10 @@ static bool isZeroImm(const MachineOperand &op) { unsigned MBlazeInstrInfo:: isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (MI->getOpcode() == MBlaze::LWI) { - if ((MI->getOperand(2).isFI()) && // is a stack slot - (MI->getOperand(1).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(1)))) { - FrameIndex = MI->getOperand(2).getIndex(); + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); return MI->getOperand(0).getReg(); } } @@ -57,10 +57,10 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { unsigned MBlazeInstrInfo:: isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (MI->getOpcode() == MBlaze::SWI) { - if ((MI->getOperand(2).isFI()) && // is a stack slot - (MI->getOperand(1).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(1)))) { - FrameIndex = MI->getOperand(2).getIndex(); + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); return MI->getOperand(0).getReg(); } } @@ -80,7 +80,7 @@ copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg) + llvm::BuildMI(MBB, I, DL, get(MBlaze::ADDK), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0); } @@ -91,7 +91,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo *TRI) const { DebugLoc DL; BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill)) - .addImm(0).addFrameIndex(FI); + .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI); } void MBlazeInstrInfo:: @@ -101,21 +101,168 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo *TRI) const { DebugLoc DL; BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg) - .addImm(0).addFrameIndex(FI); + .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI); } //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// +bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (MBlaze::isUncondBranchOpcode(LastOpc)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (MBlaze::isCondBranchOpcode(LastOpc)) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with something like BEQID then BRID, handle it. + if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) && + MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. + // The second one is not executed, so remove it. + if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) && + MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + unsigned MBlazeInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { - // Can only insert uncond branches so far. - assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, DL, get(MBlaze::BRI)).addMBB(TBB); - return 1; + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "MBlaze branch conditions have two components!"); + + unsigned Opc = MBlaze::BRID; + if (!Cond.empty()) + Opc = (unsigned)Cond[0].getImm(); + + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch + BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB); + return 1; + } + + BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(MBlaze::BRID)).addMBB(FBB); + return 2; +} + +unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } + + if (!MBlaze::isUncondBranchOpcode(I->getOpcode()) && + !MBlaze::isCondBranchOpcode(I->getOpcode())) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (!MBlaze::isCondBranchOpcode(I->getOpcode())) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!"); + switch (Cond[0].getImm()) { + default: return true; + case MBlaze::BEQ: Cond[0].setImm(MBlaze::BNE); return false; + case MBlaze::BNE: Cond[0].setImm(MBlaze::BEQ); return false; + case MBlaze::BGT: Cond[0].setImm(MBlaze::BLE); return false; + case MBlaze::BGE: Cond[0].setImm(MBlaze::BLT); return false; + case MBlaze::BLT: Cond[0].setImm(MBlaze::BGE); return false; + case MBlaze::BLE: Cond[0].setImm(MBlaze::BGT); return false; + case MBlaze::BEQI: Cond[0].setImm(MBlaze::BNEI); return false; + case MBlaze::BNEI: Cond[0].setImm(MBlaze::BEQI); return false; + case MBlaze::BGTI: Cond[0].setImm(MBlaze::BLEI); return false; + case MBlaze::BGEI: Cond[0].setImm(MBlaze::BLTI); return false; + case MBlaze::BLTI: Cond[0].setImm(MBlaze::BGEI); return false; + case MBlaze::BLEI: Cond[0].setImm(MBlaze::BGTI); return false; + case MBlaze::BEQD: Cond[0].setImm(MBlaze::BNED); return false; + case MBlaze::BNED: Cond[0].setImm(MBlaze::BEQD); return false; + case MBlaze::BGTD: Cond[0].setImm(MBlaze::BLED); return false; + case MBlaze::BGED: Cond[0].setImm(MBlaze::BLTD); return false; + case MBlaze::BLTD: Cond[0].setImm(MBlaze::BGED); return false; + case MBlaze::BLED: Cond[0].setImm(MBlaze::BGTD); return false; + case MBlaze::BEQID: Cond[0].setImm(MBlaze::BNEID); return false; + case MBlaze::BNEID: Cond[0].setImm(MBlaze::BEQID); return false; + case MBlaze::BGTID: Cond[0].setImm(MBlaze::BLEID); return false; + case MBlaze::BGEID: Cond[0].setImm(MBlaze::BLTID); return false; + case MBlaze::BLTID: Cond[0].setImm(MBlaze::BGEID); return false; + case MBlaze::BLEID: Cond[0].setImm(MBlaze::BGTID); return false; + } } /// getGlobalBaseReg - Return a virtual register initialized with the @@ -134,7 +281,7 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass); + GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass); BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), GlobalBaseReg).addReg(MBlaze::R20); RegInfo.addLiveIn(MBlaze::R20); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index b3dba0e..b7300c1 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -73,59 +73,92 @@ namespace MBlaze { FCOND_GT, // Only integer conditions - COND_E, - COND_GZ, - COND_GEZ, - COND_LZ, - COND_LEZ, + COND_EQ, + COND_GT, + COND_GE, + COND_LT, + COND_LE, COND_NE, COND_INVALID }; // Turn condition code into conditional branch opcode. - unsigned GetCondBranchFromCond(CondCode CC); + inline static unsigned GetCondBranchFromCond(CondCode CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case COND_EQ: return MBlaze::BEQID; + case COND_NE: return MBlaze::BNEID; + case COND_GT: return MBlaze::BGTID; + case COND_GE: return MBlaze::BGEID; + case COND_LT: return MBlaze::BLTID; + case COND_LE: return MBlaze::BLEID; + } + } /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. - CondCode GetOppositeBranchCondition(MBlaze::CondCode CC); + // CondCode GetOppositeBranchCondition(MBlaze::CondCode CC); /// MBlazeCCToString - Map each FP condition code to its string - inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) - { + inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) { switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case FCOND_F: - case FCOND_T: return "f"; - case FCOND_UN: - case FCOND_OR: return "un"; - case FCOND_EQ: - case FCOND_NEQ: return "eq"; - case FCOND_UEQ: - case FCOND_OGL: return "ueq"; - case FCOND_OLT: - case FCOND_UGE: return "olt"; - case FCOND_ULT: - case FCOND_OGE: return "ult"; - case FCOND_OLE: - case FCOND_UGT: return "ole"; - case FCOND_ULE: - case FCOND_OGT: return "ule"; - case FCOND_SF: - case FCOND_ST: return "sf"; - case FCOND_NGLE: - case FCOND_GLE: return "ngle"; - case FCOND_SEQ: - case FCOND_SNE: return "seq"; - case FCOND_NGL: - case FCOND_GL: return "ngl"; - case FCOND_LT: - case FCOND_NLT: return "lt"; - case FCOND_NGE: - case FCOND_GE: return "ge"; - case FCOND_LE: - case FCOND_NLE: return "nle"; - case FCOND_NGT: - case FCOND_GT: return "gt"; + default: llvm_unreachable("Unknown condition code"); + case FCOND_F: + case FCOND_T: return "f"; + case FCOND_UN: + case FCOND_OR: return "un"; + case FCOND_EQ: + case FCOND_NEQ: return "eq"; + case FCOND_UEQ: + case FCOND_OGL: return "ueq"; + case FCOND_OLT: + case FCOND_UGE: return "olt"; + case FCOND_ULT: + case FCOND_OGE: return "ult"; + case FCOND_OLE: + case FCOND_UGT: return "ole"; + case FCOND_ULE: + case FCOND_OGT: return "ule"; + case FCOND_SF: + case FCOND_ST: return "sf"; + case FCOND_NGLE: + case FCOND_GLE: return "ngle"; + case FCOND_SEQ: + case FCOND_SNE: return "seq"; + case FCOND_NGL: + case FCOND_GL: return "ngl"; + case FCOND_LT: + case FCOND_NLT: return "lt"; + case FCOND_NGE: + case FCOND_GE: return "ge"; + case FCOND_LE: + case FCOND_NLE: return "nle"; + case FCOND_NGT: + case FCOND_GT: return "gt"; + } + } + + inline static bool isUncondBranchOpcode(int Opc) { + switch (Opc) { + default: return false; + case MBlaze::BRI: + case MBlaze::BRAI: + case MBlaze::BRID: + case MBlaze::BRAID: + return true; + } + } + + inline static bool isCondBranchOpcode(int Opc) { + switch (Opc) { + default: return false; + case MBlaze::BEQI: case MBlaze::BEQID: + case MBlaze::BNEI: case MBlaze::BNEID: + case MBlaze::BGTI: case MBlaze::BGTID: + case MBlaze::BGEI: case MBlaze::BGEID: + case MBlaze::BLTI: case MBlaze::BLTID: + case MBlaze::BLEI: case MBlaze::BLEID: + return true; } } } @@ -134,29 +167,54 @@ namespace MBlaze { /// instruction info tracks. /// namespace MBlazeII { - /// Target Operand Flag enum. - enum TOF { + enum { + // PseudoFrm - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + FPseudo = 0, + FRRR, + FRRI, + FCRR, + FCRI, + FRCR, + FRCI, + FCCR, + FCCI, + FRRCI, + FRRC, + FRCX, + FRCS, + FCRCS, + FCRCX, + FCX, + FCR, + FRIR, + FRRRR, + FRI, + FC, + FormMask = 63 + //===------------------------------------------------------------------===// // MBlaze Specific MachineOperand flags. - MO_NO_FLAG, + // MO_NO_FLAG, /// MO_GOT - Represents the offset into the global offset table at which /// the address the relocation entry symbol resides during execution. - MO_GOT, + // MO_GOT, /// MO_GOT_CALL - Represents the offset into the global offset table at /// which the address of a call site relocation entry symbol resides /// during execution. This is different from the above since this flag /// can only be present in call instructions. - MO_GOT_CALL, + // MO_GOT_CALL, /// MO_GPREL - Represents the offset from the current gp value to be used /// for the relocatable object file being produced. - MO_GPREL, + // MO_GPREL, /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol /// address. - MO_ABS_HILO + // MO_ABS_HILO }; } @@ -190,10 +248,20 @@ public: int &FrameIndex) const; /// Branch Analysis + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) + const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index e5d1534..7b8f70a 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -13,35 +13,36 @@ include "MBlazeInstrFormats.td" //===----------------------------------------------------------------------===// -// MBlaze profiles and nodes +// MBlaze type profiles //===----------------------------------------------------------------------===// + +// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>; def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def SDT_MBlazeIRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_MBlazeJmpLink : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>; +def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -// Call -def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink, - [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>; +//===----------------------------------------------------------------------===// +// MBlaze specific nodes +//===----------------------------------------------------------------------===// -// Return -def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet, - [SDNPHasChain, SDNPOptInFlag]>; +def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet, + [SDNPHasChain, SDNPOptInGlue]>; +def MBlazeIRet : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet, + [SDNPHasChain, SDNPOptInGlue]>; -// Hi and Lo nodes are used to handle global addresses. Used on -// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol -// static model. -def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>; -def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>; +def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink, + [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue, + SDNPVariadic]>; -def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; -def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>; -// These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; -def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; //===----------------------------------------------------------------------===// // MBlaze Instruction Predicate Definitions. @@ -67,11 +68,22 @@ def HasMMU : Predicate<"Subtarget.hasMMU()">; // MBlaze Operand, Complex Patterns and Transformations Definitions. //===----------------------------------------------------------------------===// +def MBlazeMemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let SuperClasses = []; +} + +def MBlazeFslAsmOperand : AsmOperandClass { + let Name = "Fsl"; + let SuperClasses = []; +} + // Instruction operand types def brtarget : Operand<OtherVT>; def calltarget : Operand<i32>; def simm16 : Operand<i32>; def uimm5 : Operand<i32>; +def uimm15 : Operand<i32>; def fimm : Operand<f32>; // Unsigned Operand @@ -82,31 +94,23 @@ def uimm16 : Operand<i32> { // FSL Operand def fslimm : Operand<i32> { let PrintMethod = "printFSLImm"; + let ParserMatchClass = MBlazeFslAsmOperand; } // Address operand def memri : Operand<i32> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops simm16, CPURegs); + let MIOperandInfo = (ops GPR, simm16); + let ParserMatchClass = MBlazeMemAsmOperand; } def memrr : Operand<i32> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops CPURegs, CPURegs); + let MIOperandInfo = (ops GPR, GPR); + let ParserMatchClass = MBlazeMemAsmOperand; } -// Transformation Function - get the lower 16 bits. -def LO16 : SDNodeXForm<imm, [{ - return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF); -}]>; - -// Transformation Function - get the higher 16 bits. -def HI16 : SDNodeXForm<imm, [{ - return getI32Imm((unsigned)N->getZExtValue() >> 16); -}]>; - // Node immediate fits as 16-bit sign extended on target immediate. -// e.g. addi, andi def immSExt16 : PatLeaf<(imm), [{ return (N->getZExtValue() >> 16) == 0; }]>; @@ -117,19 +121,19 @@ def immSExt16 : PatLeaf<(imm), [{ // e.g. addiu, sltiu def immZExt16 : PatLeaf<(imm), [{ return (N->getZExtValue() >> 16) == 0; -}], LO16>; +}]>; // FSL immediate field must fit in 4 bits. def immZExt4 : PatLeaf<(imm), [{ - return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ; + return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ; }]>; // shamt field must fit in 5 bits. def immZExt5 : PatLeaf<(imm), [{ - return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ; + return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ; }]>; -// MBlaze Address Mode! SDNode frameindex could possibily be a match +// MBlaze Address Mode. SDNode frameindex could possibily be a match // since load and store instructions from stack used it. def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>; def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>; @@ -141,28 +145,14 @@ def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>; // As stack alignment is always done with addiu, we need a 16-bit immediate let Defs = [R1], Uses = [R1] in { def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt), - "${:comment} ADJCALLSTACKDOWN $amt", + "#ADJCALLSTACKDOWN $amt", [(callseq_start timm:$amt)]>; def ADJCALLSTACKUP : MBlazePseudo<(outs), (ins uimm16:$amt1, simm16:$amt2), - "${:comment} ADJCALLSTACKUP $amt1", + "#ADJCALLSTACKUP $amt1", [(callseq_end timm:$amt1, timm:$amt2)]>; } -// Some assembly macros need to avoid pseudoinstructions and assembler -// automatic reodering, we should reorder ourselves. -def MACRO : MBlazePseudo<(outs), (ins), ".set macro", []>; -def REORDER : MBlazePseudo<(outs), (ins), ".set reorder", []>; -def NOMACRO : MBlazePseudo<(outs), (ins), ".set nomacro", []>; -def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>; - -// When handling PIC code the assembler needs .cpload and .cprestore -// directives. If the real instructions corresponding these directives -// are used, we have the same behavior, but get also a bunch of warnings -// from the assembler. -def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>; -def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>; - //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// @@ -172,47 +162,58 @@ def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>; //===----------------------------------------------------------------------===// class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, InstrItinClass itin> : - TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; + [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>; class ArithI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : - TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), + TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>; + +class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : + TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; + +class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type> : + SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>; + [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>; class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode, InstrItinClass itin> : - TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), - !strconcat(instr_asm, " $dst, $c, $b"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; + TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), + !strconcat(instr_asm, " $dst, $c, $b"), + [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>; class ArithRI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : - TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c), + TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c), !strconcat(instr_asm, " $dst, $c, $b"), - [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>; + [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIAlu>; class ArithN<bits<6> op, bits<11> flags, string instr_asm, InstrItinClass itin> : - TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), !strconcat(instr_asm, " $dst, $b, $c"), [], itin>; class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : - TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, " $dst, $b, $c"), - [], IIAlu>; + TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; class ArithRN<bits<6> op, bits<11> flags, string instr_asm, InstrItinClass itin> : - TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), - !strconcat(instr_asm, " $dst, $b, $c"), - [], itin>; + TAR<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b), + !strconcat(instr_asm, " $dst, $b, $c"), + [], itin>; class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : - TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b), + TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b), !strconcat(instr_asm, " $dst, $b, $c"), [], IIAlu>; @@ -221,135 +222,179 @@ class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> : //===----------------------------------------------------------------------===// class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> : - TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), !strconcat(instr_asm, " $dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; + [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIAlu>; class LogicI<bits<6> op, string instr_asm, SDNode OpNode> : - TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c), - !strconcat(instr_asm, " $dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], - IIAlu>; - -class EffectiveAddress<string instr_asm> : - TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr), - instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>; + TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))], + IIAlu>; + +class LogicI32<bits<6> op, string instr_asm> : + TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; + +class PatCmp<bits<6> op, bits<11> flags, string instr_asm> : + TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c), + !strconcat(instr_asm, " $dst, $b, $c"), + [], IIAlu>; //===----------------------------------------------------------------------===// // Memory Access Instructions //===----------------------------------------------------------------------===// -class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> : - TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr), +class LoadM<bits<6> op, bits<11> flags, string instr_asm> : + TA<op, flags, (outs GPR:$dst), (ins memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>; + [], IILoad>; class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> : - TAI<op, (outs CPURegs:$dst), (ins memri:$addr), - !strconcat(instr_asm, " $dst, $addr"), - [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>; + TB<op, (outs GPR:$dst), (ins memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>; -class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> : - TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr), +class StoreM<bits<6> op, bits<11> flags, string instr_asm> : + TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr), !strconcat(instr_asm, " $dst, $addr"), - [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>; + [], IIStore>; class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> : - TAI<op, (outs), (ins CPURegs:$dst, memri:$addr), - !strconcat(instr_asm, " $dst, $addr"), - [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>; + TB<op, (outs), (ins GPR:$dst, memri:$addr), + !strconcat(instr_asm, " $dst, $addr"), + [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIStore>; //===----------------------------------------------------------------------===// // Branch Instructions //===----------------------------------------------------------------------===// class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : - TBR<op, br, flags, (outs), (ins CPURegs:$target), - !strconcat(instr_asm, " $target"), - [(brind CPURegs:$target)], IIBranch>; + TA<op, flags, (outs), (ins GPR:$target), + !strconcat(instr_asm, " $target"), + [], IIBranch> { + let rd = 0x0; + let ra = br; + let Form = FCCR; +} -class BranchI<bits<6> op, bits<5> brf, string instr_asm> : - TBRI<op, brf, (outs), (ins brtarget:$target), - !strconcat(instr_asm, " $target"), - [(br bb:$target)], IIBranch>; +class BranchI<bits<6> op, bits<5> br, string instr_asm> : + TB<op, (outs), (ins brtarget:$target), + !strconcat(instr_asm, " $target"), + [], IIBranch> { + let rd = 0; + let ra = br; + let Form = FCCI; +} //===----------------------------------------------------------------------===// // Branch and Link Instructions //===----------------------------------------------------------------------===// class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : - TBRL<op, br, flags, (outs), (ins CPURegs:$target), - !strconcat(instr_asm, " r15, $target"), - [], IIBranch>; + TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops), + !strconcat(instr_asm, " $link, $target"), + [], IIBranch> { + let ra = br; + let Form = FRCR; +} class BranchLI<bits<6> op, bits<5> br, string instr_asm> : - TBRLI<op, br, (outs), (ins calltarget:$target), - !strconcat(instr_asm, " r15, $target"), - [], IIBranch>; + TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops), + !strconcat(instr_asm, " $link, $target"), + [], IIBranch> { + let ra = br; + let Form = FRCI; +} //===----------------------------------------------------------------------===// // Conditional Branch Instructions //===----------------------------------------------------------------------===// -class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm, - PatFrag cond_op> : - TBRC<op, br, flags, (outs), - (ins CPURegs:$a, CPURegs:$b, brtarget:$offset), - !strconcat(instr_asm, " $a, $b, $offset"), - [], IIBranch>; - //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)], - //IIBranch>; +class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> : + TA<op, flags, (outs), + (ins GPR:$a, GPR:$b), + !strconcat(instr_asm, " $a, $b"), + [], IIBranch> { + let rd = br; + let Form = FCRR; +} -class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> : - TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset), - !strconcat(instr_asm, " $a, $offset"), - [], IIBranch>; +class BranchCI<bits<6> op, bits<5> br, string instr_asm> : + TB<op, (outs), (ins GPR:$a, brtarget:$offset), + !strconcat(instr_asm, " $a, $offset"), + [], IIBranch> { + let rd = br; + let Form = FCRI; +} //===----------------------------------------------------------------------===// // MBlaze arithmetic instructions //===----------------------------------------------------------------------===// let isCommutable = 1, isAsCheapAsAMove = 1 in { - def ADD : Arith<0x00, 0x000, "add ", add, IIAlu>; - def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>; - def ADDK : Arith<0x04, 0x000, "addk ", addc, IIAlu>; + def ADDK : Arith<0x04, 0x000, "addk ", add, IIAlu>; + def AND : Logic<0x21, 0x000, "and ", and>; + def OR : Logic<0x20, 0x000, "or ", or>; + def XOR : Logic<0x22, 0x000, "xor ", xor>; + def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">; + def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">; + def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">; + + let Defs = [CARRY] in { + def ADD : Arith<0x00, 0x000, "add ", addc, IIAlu>; + + let Uses = [CARRY] in { + def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>; + } + } + + let Uses = [CARRY] in { def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>; - def AND : Logic<0x21, 0x000, "and ", and>; - def OR : Logic<0x20, 0x000, "or ", or>; - def XOR : Logic<0x22, 0x000, "xor ", xor>; + } } let isAsCheapAsAMove = 1 in { - def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>; - def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>; - def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>; - def RSUB : ArithR<0x01, 0x000, "rsub ", sub, IIAlu>; - def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>; - def RSUBK : ArithR<0x05, 0x000, "rsubk ", subc, IIAlu>; + def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>; + def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>; + def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>; + def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIAlu>; + + let Defs = [CARRY] in { + def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIAlu>; + + let Uses = [CARRY] in { + def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>; + } + } + + let Uses = [CARRY] in { def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>; + } } let isCommutable = 1, Predicates=[HasMul] in { - def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>; + def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>; } let isCommutable = 1, Predicates=[HasMul,HasMul64] in { - def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>; - def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>; + def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>; + def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>; } let Predicates=[HasMul,HasMul64] in { - def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>; + def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>; } let Predicates=[HasBarrel] in { - def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>; - def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>; - def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>; - def BSRLI : ArithI<0x11, "bsrli ", srl, uimm5, immZExt5>; - def BSRAI : ArithI<0x11, "bsrai ", sra, uimm5, immZExt5>; - def BSLLI : ArithI<0x11, "bslli ", shl, uimm5, immZExt5>; + def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>; + def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>; + def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>; + def BSRLI : ShiftI<0x19, 0x0, "bsrli ", srl, uimm5, immZExt5>; + def BSRAI : ShiftI<0x19, 0x1, "bsrai ", sra, uimm5, immZExt5>; + def BSLLI : ShiftI<0x19, 0x2, "bslli ", shl, uimm5, immZExt5>; } let Predicates=[HasDiv] in { - def IDIV : Arith<0x12, 0x000, "idiv ", sdiv, IIAlu>; - def IDIVU : Arith<0x12, 0x002, "idivu ", udiv, IIAlu>; + def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIAlu>; + def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIAlu>; } //===----------------------------------------------------------------------===// @@ -357,22 +402,31 @@ let Predicates=[HasDiv] in { //===----------------------------------------------------------------------===// let isAsCheapAsAMove = 1 in { - def ADDI : ArithI<0x08, "addi ", add, simm16, immSExt16>; - def ADDIC : ArithNI<0x0A, "addic ", simm16, immSExt16>; - def ADDIK : ArithNI<0x0C, "addik ", simm16, immSExt16>; - def ADDIKC : ArithI<0x0E, "addikc ", addc, simm16, immSExt16>; - def RSUBI : ArithRI<0x09, "rsubi ", sub, simm16, immSExt16>; - def RSUBIC : ArithRNI<0x0B, "rsubi ", simm16, immSExt16>; - def RSUBIK : ArithRNI<0x0E, "rsubic ", simm16, immSExt16>; - def RSUBIKC : ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>; - def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>; - def ANDI : LogicI<0x29, "andi ", and>; - def ORI : LogicI<0x28, "ori ", or>; - def XORI : LogicI<0x2A, "xori ", xor>; + def ADDIK : ArithI<0x0C, "addik ", add, simm16, immSExt16>; + def RSUBIK : ArithRI<0x0D, "rsubik ", sub, simm16, immSExt16>; + def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>; + def ANDI : LogicI<0x29, "andi ", and>; + def ORI : LogicI<0x28, "ori ", or>; + def XORI : LogicI<0x2A, "xori ", xor>; + + let Defs = [CARRY] in { + def ADDI : ArithI<0x08, "addi ", addc, simm16, immSExt16>; + def RSUBI : ArithRI<0x09, "rsubi ", subc, simm16, immSExt16>; + + let Uses = [CARRY] in { + def ADDIC : ArithI<0x0A, "addic ", adde, simm16, immSExt16>; + def RSUBIC : ArithRI<0x0B, "rsubic ", sube, simm16, immSExt16>; + } + } + + let Uses = [CARRY] in { + def ADDIKC : ArithNI<0x0E, "addikc ", simm16, immSExt16>; + def RSUBIKC : ArithRNI<0x0F, "rsubikc", simm16, immSExt16>; + } } let Predicates=[HasMul] in { - def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>; + def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>; } //===----------------------------------------------------------------------===// @@ -380,290 +434,445 @@ let Predicates=[HasMul] in { //===----------------------------------------------------------------------===// let canFoldAsLoad = 1, isReMaterializable = 1 in { - def LBU : LoadM<0x30, "lbu ", zextloadi8>; - def LHU : LoadM<0x31, "lhu ", zextloadi16>; - def LW : LoadM<0x32, "lw ", load>; + def LBU : LoadM<0x30, 0x000, "lbu ">; + def LBUR : LoadM<0x30, 0x200, "lbur ">; + + def LHU : LoadM<0x31, 0x000, "lhu ">; + def LHUR : LoadM<0x31, 0x200, "lhur ">; + + def LW : LoadM<0x32, 0x000, "lw ">; + def LWR : LoadM<0x32, 0x200, "lwr ">; - def LBUI : LoadMI<0x30, "lbui ", zextloadi8>; - def LHUI : LoadMI<0x31, "lhui ", zextloadi16>; - def LWI : LoadMI<0x32, "lwi ", load>; + let Defs = [CARRY] in { + def LWX : LoadM<0x32, 0x400, "lwx ">; + } + + def LBUI : LoadMI<0x38, "lbui ", zextloadi8>; + def LHUI : LoadMI<0x39, "lhui ", zextloadi16>; + def LWI : LoadMI<0x3A, "lwi ", load>; } - def SB : StoreM<0x34, "sb ", truncstorei8>; - def SH : StoreM<0x35, "sh ", truncstorei16>; - def SW : StoreM<0x36, "sw ", store>; +def SB : StoreM<0x34, 0x000, "sb ">; +def SBR : StoreM<0x34, 0x200, "sbr ">; + +def SH : StoreM<0x35, 0x000, "sh ">; +def SHR : StoreM<0x35, 0x200, "shr ">; + +def SW : StoreM<0x36, 0x000, "sw ">; +def SWR : StoreM<0x36, 0x200, "swr ">; - def SBI : StoreMI<0x34, "sbi ", truncstorei8>; - def SHI : StoreMI<0x35, "shi ", truncstorei16>; - def SWI : StoreMI<0x36, "swi ", store>; +let Defs = [CARRY] in { + def SWX : StoreM<0x36, 0x400, "swx ">; +} + +def SBI : StoreMI<0x3C, "sbi ", truncstorei8>; +def SHI : StoreMI<0x3D, "shi ", truncstorei16>; +def SWI : StoreMI<0x3E, "swi ", store>; //===----------------------------------------------------------------------===// // MBlaze branch instructions //===----------------------------------------------------------------------===// +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { + def BRI : BranchI<0x2E, 0x00, "bri ">; + def BRAI : BranchI<0x2E, 0x08, "brai ">; +} + let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { - def BRI : BranchI<0x2E, 0x00, "bri ">; - def BRAI : BranchI<0x2E, 0x08, "brai ">; - def BEQI : BranchCI<0x2F, 0x00, "beqi ", seteq>; - def BNEI : BranchCI<0x2F, 0x01, "bnei ", setne>; - def BLTI : BranchCI<0x2F, 0x02, "blti ", setlt>; - def BLEI : BranchCI<0x2F, 0x03, "blei ", setle>; - def BGTI : BranchCI<0x2F, 0x04, "bgti ", setgt>; - def BGEI : BranchCI<0x2F, 0x05, "bgei ", setge>; + def BEQI : BranchCI<0x2F, 0x00, "beqi ">; + def BNEI : BranchCI<0x2F, 0x01, "bnei ">; + def BLTI : BranchCI<0x2F, 0x02, "blti ">; + def BLEI : BranchCI<0x2F, 0x03, "blei ">; + def BGTI : BranchCI<0x2F, 0x04, "bgti ">; + def BGEI : BranchCI<0x2F, 0x05, "bgei ">; +} + +let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1, + isBarrier = 1 in { + def BR : Branch<0x26, 0x00, 0x000, "br ">; + def BRA : Branch<0x26, 0x08, 0x000, "bra ">; } let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { - def BR : Branch<0x26, 0x00, 0x000, "br ">; - def BRA : Branch<0x26, 0x08, 0x000, "bra ">; - def BEQ : BranchC<0x27, 0x00, 0x000, "beq ", seteq>; - def BNE : BranchC<0x27, 0x01, 0x000, "bne ", setne>; - def BLT : BranchC<0x27, 0x02, 0x000, "blt ", setlt>; - def BLE : BranchC<0x27, 0x03, 0x000, "ble ", setle>; - def BGT : BranchC<0x27, 0x04, 0x000, "bgt ", setgt>; - def BGE : BranchC<0x27, 0x05, 0x000, "bge ", setge>; + def BEQ : BranchC<0x27, 0x00, 0x000, "beq ">; + def BNE : BranchC<0x27, 0x01, 0x000, "bne ">; + def BLT : BranchC<0x27, 0x02, 0x000, "blt ">; + def BLE : BranchC<0x27, 0x03, 0x000, "ble ">; + def BGT : BranchC<0x27, 0x04, 0x000, "bgt ">; + def BGE : BranchC<0x27, 0x05, 0x000, "bge ">; +} + +let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1, + isBarrier = 1 in { + def BRID : BranchI<0x2E, 0x10, "brid ">; + def BRAID : BranchI<0x2E, 0x18, "braid ">; } let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in { - def BRID : BranchI<0x2E, 0x10, "brid ">; - def BRAID : BranchI<0x2E, 0x18, "braid ">; - def BEQID : BranchCI<0x2F, 0x10, "beqid ", seteq>; - def BNEID : BranchCI<0x2F, 0x11, "bneid ", setne>; - def BLTID : BranchCI<0x2F, 0x12, "bltid ", setlt>; - def BLEID : BranchCI<0x2F, 0x13, "bleid ", setle>; - def BGTID : BranchCI<0x2F, 0x14, "bgtid ", setgt>; - def BGEID : BranchCI<0x2F, 0x15, "bgeid ", setge>; + def BEQID : BranchCI<0x2F, 0x10, "beqid ">; + def BNEID : BranchCI<0x2F, 0x11, "bneid ">; + def BLTID : BranchCI<0x2F, 0x12, "bltid ">; + def BLEID : BranchCI<0x2F, 0x13, "bleid ">; + def BGTID : BranchCI<0x2F, 0x14, "bgtid ">; + def BGEID : BranchCI<0x2F, 0x15, "bgeid ">; +} + +let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, + hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in { + def BRD : Branch<0x26, 0x10, 0x000, "brd ">; + def BRAD : Branch<0x26, 0x18, 0x000, "brad ">; } let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in { - def BRD : Branch<0x26, 0x10, 0x000, "brd ">; - def BRAD : Branch<0x26, 0x18, 0x000, "brad ">; - def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ", seteq>; - def BNED : BranchC<0x27, 0x11, 0x000, "bned ", setne>; - def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ", setlt>; - def BLED : BranchC<0x27, 0x13, 0x000, "bled ", setle>; - def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ", setgt>; - def BGED : BranchC<0x27, 0x15, 0x000, "bged ", setge>; + def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ">; + def BNED : BranchC<0x27, 0x11, 0x000, "bned ">; + def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ">; + def BLED : BranchC<0x27, 0x13, 0x000, "bled ">; + def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ">; + def BGED : BranchC<0x27, 0x15, 0x000, "bged ">; +} + +let isCall =1, hasDelaySlot = 1, + Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY], + Uses = [R1] in { + def BRLID : BranchLI<0x2E, 0x14, "brlid ">; + def BRALID : BranchLI<0x2E, 0x1C, "bralid ">; +} + +let isCall = 1, hasDelaySlot = 1, + Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY], + Uses = [R1] in { + def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">; + def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">; } -let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1, - Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], - Uses = [R1,R5,R6,R7,R8,R9,R10] in { - def BRL : BranchL<0x26, 0x04, 0x000, "brl ">; - def BRAL : BranchL<0x26, 0x0C, 0x000, "bral ">; +let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, + rd=0x10, Form=FCRI in { + def RTSD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), + "rtsd $target, $imm", + [], + IIBranch>; } -let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, - Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], - Uses = [R1,R5,R6,R7,R8,R9,R10] in { - def BRLID : BranchLI<0x2E, 0x14, "brlid ">; - def BRALID : BranchLI<0x2E, 0x1C, "bralid ">; +let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, + rd=0x11, Form=FCRI in { + def RTID : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), + "rtid $target, $imm", + [], + IIBranch>; } -let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1, - Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12], - Uses = [R1,R5,R6,R7,R8,R9,R10] in { - def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">; - def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">; +let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, + rd=0x12, Form=FCRI in { + def RTBD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), + "rtbd $target, $imm", + [], + IIBranch>; } -let isReturn=1, isTerminator=1, hasDelaySlot=1, - isBarrier=1, hasCtrlDep=1, imm16=0x8 in { - def RTSD : TRET<0x2D, (outs), (ins CPURegs:$target), - "rtsd $target, 8", - [(MBlazeRet CPURegs:$target)], - IIBranch>; +let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, + rd=0x14, Form=FCRI in { + def RTED : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), + "rted $target, $imm", + [], + IIBranch>; } //===----------------------------------------------------------------------===// // MBlaze misc instructions //===----------------------------------------------------------------------===// -let addr = 0 in { - def NOP : TADDR<0x00, (outs), (ins), "nop ", [], IIAlu>; +let neverHasSideEffects = 1 in { + def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIAlu>; } let usesCustomInserter = 1 in { - //class PseudoSelCC<RegisterClass RC, string asmstr>: - // MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr, - // [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>; - //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">; - - def Select_CC : MBlazePseudo<(outs CPURegs:$dst), - (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC), + def Select_CC : MBlazePseudo<(outs GPR:$dst), + (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed "; SELECT_CC PSEUDO!", []>; - def ShiftL : MBlazePseudo<(outs CPURegs:$dst), - (ins CPURegs:$L, CPURegs:$R), + def ShiftL : MBlazePseudo<(outs GPR:$dst), + (ins GPR:$L, GPR:$R), "; ShiftL PSEUDO!", []>; - def ShiftRA : MBlazePseudo<(outs CPURegs:$dst), - (ins CPURegs:$L, CPURegs:$R), + def ShiftRA : MBlazePseudo<(outs GPR:$dst), + (ins GPR:$L, GPR:$R), "; ShiftRA PSEUDO!", []>; - def ShiftRL : MBlazePseudo<(outs CPURegs:$dst), - (ins CPURegs:$L, CPURegs:$R), + def ShiftRL : MBlazePseudo<(outs GPR:$dst), + (ins GPR:$L, GPR:$R), "; ShiftRL PSEUDO!", []>; } - let rb = 0 in { - def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src), - "sext16 $dst, $src", [], IIAlu>; - def SEXT8 : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src), - "sext8 $dst, $src", [], IIAlu>; - def SRL : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src), - "srl $dst, $src", [], IIAlu>; - def SRA : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src), - "sra $dst, $src", [], IIAlu>; - def SRC : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src), - "src $dst, $src", [], IIAlu>; + def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src), + "sext16 $dst, $src", [], IIAlu>; + def SEXT8 : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src), + "sext8 $dst, $src", [], IIAlu>; + let Defs = [CARRY] in { + def SRL : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src), + "srl $dst, $src", [], IIAlu>; + def SRA : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src), + "sra $dst, $src", [], IIAlu>; + let Uses = [CARRY] in { + def SRC : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src), + "src $dst, $src", [], IIAlu>; + } + } +} + +let isCodeGenOnly=1 in { + def ADDIK32 : ArithI32<0x08, "addik ", simm16, immSExt16>; + def ORI32 : LogicI32<0x28, "ori ">; + def BRLID32 : BranchLI<0x2E, 0x14, "brlid ">; +} + +//===----------------------------------------------------------------------===// +// Misc. instructions +//===----------------------------------------------------------------------===// +let Form=FRCS in { + def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src), + "mfs $dst, $src", [], IIAlu>; +} + +let Form=FCRCS in { + def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src), + "mts $dst, $src", [], IIAlu>; +} + +def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set), + "msrset $dst, $set", [], IIAlu>; + +def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr), + "msrclr $dst, $clr", [], IIAlu>; + +let rd=0x0, Form=FCRR in { + def WDC : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b), + "wdc $a, $b", [], IIAlu>; + def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b), + "wdc.flush $a, $b", [], IIAlu>; + def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b), + "wdc.clear $a, $b", [], IIAlu>; + def WIC : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b), + "wic $a, $b", [], IIAlu>; } -def LEA_ADDI : EffectiveAddress<"addi $dst, ${addr:stackloc}">; +def BRK : BranchL<0x26, 0x0C, 0x000, "brk ">; +def BRKI : BranchLI<0x2E, 0x0C, "brki ">; + +def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm), + "imm $imm", [], IIAlu>; + +//===----------------------------------------------------------------------===// +// Pseudo instructions for atomic operations +//===----------------------------------------------------------------------===// +let usesCustomInserter=1 in { + def CAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$cmp, GPR:$swp), + "# atomic compare and swap", + [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$cmp, GPR:$swp))]>; + + def SWP32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$swp), + "# atomic swap", + [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$swp))]>; + + def LAA32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), + "# atomic load and add", + [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$val))]>; + + def LAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), + "# atomic load and sub", + [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$val))]>; + + def LAD32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), + "# atomic load and and", + [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$val))]>; + + def LAO32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), + "# atomic load and or", + [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$val))]>; + + def LAX32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), + "# atomic load and xor", + [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$val))]>; + + def LAN32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val), + "# atomic load and nand", + [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>; + + def MEMBARRIER : MBlazePseudo<(outs), (ins), + "# memory barrier", + [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>; +} //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// // Small immediates -def : Pat<(i32 0), (ADD R0, R0)>; -def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>; -def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>; +def : Pat<(i32 0), (ADDK (i32 R0), (i32 R0))>; +def : Pat<(i32 immSExt16:$imm), (ADDIK (i32 R0), imm:$imm)>; +def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>; // Arbitrary immediates -def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>; +def : Pat<(i32 imm:$imm), (ADDIK (i32 R0), imm:$imm)>; // In register sign extension -def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>; -def : Pat<(sext_inreg CPURegs:$src, i8), (SEXT8 CPURegs:$src)>; +def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>; +def : Pat<(sext_inreg GPR:$src, i8), (SEXT8 GPR:$src)>; // Call -def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>; -def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>; -def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>; +def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), + (BRLID (i32 R15), tglobaladdr:$dst)>; + +def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)), + (BRLID (i32 R15), texternalsym:$dst)>; + +def : Pat<(MBlazeJmpLink GPR:$dst), + (BRALD (i32 R15), GPR:$dst)>; // Shift Instructions -def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>; -def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>; -def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>; +def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>; +def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>; +def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>; // SET_CC operations -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMP CPURegs:$L, CPURegs:$R), 1)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMP CPURegs:$L, CPURegs:$R), 2)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMP CPURegs:$L, CPURegs:$R), 3)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMP CPURegs:$L, CPURegs:$R), 4)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMP CPURegs:$L, CPURegs:$R), 5)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMP CPURegs:$L, CPURegs:$R), 6)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMPU CPURegs:$L, CPURegs:$R), 3)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMPU CPURegs:$L, CPURegs:$R), 4)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMPU CPURegs:$L, CPURegs:$R), 5)>; -def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE), - (Select_CC (ADDI R0, 1), (ADDI R0, 0), - (CMPU CPURegs:$L, CPURegs:$R), 6)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMP GPR:$R, GPR:$L), 1)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMP GPR:$R, GPR:$L), 2)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMP GPR:$R, GPR:$L), 3)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMP GPR:$R, GPR:$L), 4)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMP GPR:$R, GPR:$L), 5)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMP GPR:$R, GPR:$L), 6)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, GPR:$L), 3)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, GPR:$L), 4)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, GPR:$L), 5)>; +def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, GPR:$L), 6)>; // SELECT operations -def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F), - (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>; - -// SELECT_CC -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ), - (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE), - (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT), - (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT), - (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE), - (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE), - (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT), - (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT), - (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE), - (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>; -def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE), - (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>; +def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)), + (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>; + +// SELECT_CC +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETEQ), + (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETNE), + (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 2)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETGT), + (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 3)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETLT), + (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 4)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETGE), + (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 5)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETLE), + (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 6)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETUGT), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 3)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETULT), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 4)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETUGE), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 5)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETULE), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 6)>; + +// Ret instructions +def : Pat<(MBlazeRet GPR:$target), (RTSD GPR:$target, 0x8)>; +def : Pat<(MBlazeIRet GPR:$target), (RTID GPR:$target, 0x0)>; + +// BR instructions +def : Pat<(br bb:$T), (BRID bb:$T)>; +def : Pat<(brind GPR:$T), (BRAD GPR:$T)>; // BRCOND instructions -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T), - (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T), - (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T), - (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T), - (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T), - (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T), - (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T), - (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T), - (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T), - (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T), - (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>; -def : Pat<(brcond CPURegs:$C, bb:$T), - (BNEID CPURegs:$C, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T), + (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T), + (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T), + (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T), + (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T), + (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T), + (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T), + (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T), + (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T), + (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T), + (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>; +def : Pat<(brcond (i32 GPR:$C), bb:$T), + (BNEID GPR:$C, bb:$T)>; // Jump tables, global addresses, and constant pools -def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>; -def : Pat<(MBWrapper tjumptable:$in), (ORI R0, tjumptable:$in)>; -def : Pat<(MBWrapper tconstpool:$in), (ORI R0, tconstpool:$in)>; +def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>; +def : Pat<(MBWrapper tjumptable:$in), (ORI (i32 R0), tjumptable:$in)>; +def : Pat<(MBWrapper tconstpool:$in), (ORI (i32 R0), tconstpool:$in)>; // Misc instructions -def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>; +def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>; // Arithmetic with immediates -def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>; -def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>; -def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>; - -// extended load and stores -def : Pat<(extloadi1 iaddr:$src), (LBUI iaddr:$src)>; -def : Pat<(extloadi8 iaddr:$src), (LBUI iaddr:$src)>; -def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>; -def : Pat<(extloadi1 xaddr:$src), (LBU xaddr:$src)>; -def : Pat<(extloadi8 xaddr:$src), (LBU xaddr:$src)>; -def : Pat<(extloadi16 xaddr:$src), (LHU xaddr:$src)>; - -def : Pat<(sextloadi1 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>; -def : Pat<(sextloadi8 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>; -def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>; -def : Pat<(sextloadi1 xaddr:$src), (SEXT8 (LBU xaddr:$src))>; -def : Pat<(sextloadi8 xaddr:$src), (SEXT8 (LBU xaddr:$src))>; -def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>; - -// peepholes -def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>; +def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDIK GPR:$in, imm:$imm)>; +def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>; +def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>; + +// Convert any extend loads into zero extend loads +def : Pat<(extloadi8 iaddr:$src), (i32 (LBUI iaddr:$src))>; +def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>; +def : Pat<(extloadi8 xaddr:$src), (i32 (LBU xaddr:$src))>; +def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>; + +// 32-bit load and store +def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>; +def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>; + +// 16-bit load and store +def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>; +def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>; + +// 8-bit load and store +def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>; +def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>; + +// Peepholes +def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>; //===----------------------------------------------------------------------===// // Floating Point Support diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index 4931860..7e4a2f5 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -48,7 +48,7 @@ std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded"); if (IntrID < Intrinsic::num_intrinsics) return 0; - assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics && + assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics && "Invalid intrinsic ID"); std::string Result(names[IntrID - Intrinsic::num_intrinsics]); @@ -94,12 +94,12 @@ static const FunctionType *getType(LLVMContext &Context, unsigned id) { const Type *ResultTy = NULL; std::vector<const Type*> ArgTys; bool IsVarArg = false; - + #define GET_INTRINSIC_GENERATOR #include "MBlazeGenIntrinsics.inc" #undef GET_INTRINSIC_GENERATOR - return FunctionType::get(ResultTy, ArgTys, IsVarArg); + return FunctionType::get(ResultTy, ArgTys, IsVarArg); } Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td index a27cb5b..278afbe 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsics.td +++ b/lib/Target/MBlaze/MBlazeIntrinsics.td @@ -1,10 +1,10 @@ //===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines all of the MicroBlaze-specific intrinsics. @@ -16,7 +16,7 @@ // // MBlaze intrinsic classes. -let TargetPrefix = "mblaze", isTarget = 1 in { +let TargetPrefix = "mblaze", isTarget = 1 in { class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>; class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp index 4abeb2e..1467141 100644 --- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp @@ -14,14 +14,9 @@ #include "MBlazeMCAsmInfo.h" using namespace llvm; -MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, StringRef TT) { +MBlazeMCAsmInfo::MBlazeMCAsmInfo() { + SupportsDebugInformation = true; AlignmentIsInBytes = false; - Data16bitsDirective = "\t.half\t"; - Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = 0; PrivateGlobalPrefix = "$"; - CommentString = "#"; - ZeroDirective = "\t.space\t"; GPRel32Directive = "\t.gpword\t"; - HasSetDirective = false; } diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h index 9d6ff3a..e68dd58 100644 --- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h +++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h @@ -19,10 +19,10 @@ namespace llvm { class Target; - + class MBlazeMCAsmInfo : public MCAsmInfo { public: - explicit MBlazeMCAsmInfo(const Target &T, StringRef TT); + explicit MBlazeMCAsmInfo(); }; } // namespace llvm diff --git a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp new file mode 100644 index 0000000..3ece1a8 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp @@ -0,0 +1,223 @@ +//===-- MBlazeMCCodeEmitter.cpp - Convert MBlaze code to machine code -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MBlazeMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "MBlaze.h" +#include "MBlazeInstrInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); + +namespace { +class MBlazeMCCodeEmitter : public MCCodeEmitter { + MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT + const TargetMachine &TM; + const TargetInstrInfo &TII; + MCContext &Ctx; + +public: + MBlazeMCCodeEmitter(TargetMachine &tm, MCContext &ctx) + : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { + } + + ~MBlazeMCCodeEmitter() {} + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + unsigned getBinaryCodeForInstr(const MCInst &MI) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO) const; + unsigned getMachineOpValue(const MCInst &MI, unsigned OpIdx) const { + return getMachineOpValue(MI, MI.getOperand(OpIdx)); + } + + static unsigned GetMBlazeRegNum(const MCOperand &MO) { + // FIXME: getMBlazeRegisterNumbering() is sufficient? + assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented."); + return 0; + } + + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { + // The MicroBlaze uses a bit reversed format so we need to reverse the + // order of the bits. Taken from: + // http://graphics.stanford.edu/~seander/bithacks.html + C = ((C * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32; + + OS << (char)C; + ++CurByte; + } + + void EmitRawByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { + OS << (char)C; + ++CurByte; + } + + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, + raw_ostream &OS) const { + assert(Size <= 8 && "size too big in emit constant"); + + for (unsigned i = 0; i != Size; ++i) { + EmitByte(Val & 255, CurByte, OS); + Val >>= 8; + } + } + + void EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const; + void EmitIMM(const MCInst &MI, unsigned &CurByte, raw_ostream &OS) const; + + void EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; +}; + +} // end anonymous namespace + + +MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx) { + return new MBlazeMCCodeEmitter(TM, Ctx); +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO) const { + if (MO.isReg()) + return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg()); + else if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + else if (MO.isExpr()) + return 0; // The relocation has already been recorded at this point. + else { +#ifndef NDEBUG + errs() << MO; +#endif + llvm_unreachable(0); + } + return 0; +} + +void MBlazeMCCodeEmitter:: +EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const { + int32_t val = (int32_t)imm.getImm(); + if (val > 32767 || val < -32768) { + EmitByte(0x0D, CurByte, OS); + EmitByte(0x00, CurByte, OS); + EmitRawByte((val >> 24) & 0xFF, CurByte, OS); + EmitRawByte((val >> 16) & 0xFF, CurByte, OS); + } +} + +void MBlazeMCCodeEmitter:: +EmitIMM(const MCInst &MI, unsigned &CurByte,raw_ostream &OS) const { + switch (MI.getOpcode()) { + default: break; + + case MBlaze::ADDIK32: + case MBlaze::ORI32: + case MBlaze::BRLID32: + EmitByte(0x0D, CurByte, OS); + EmitByte(0x00, CurByte, OS); + EmitRawByte(0, CurByte, OS); + EmitRawByte(0, CurByte, OS); + } +} + +void MBlazeMCCodeEmitter:: +EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, unsigned &CurByte, + raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { + assert(MI.getNumOperands()>opNo && "Not enought operands for instruction"); + + MCOperand oper = MI.getOperand(opNo); + + if (oper.isImm()) { + EmitIMM(oper, CurByte, OS); + } else if (oper.isExpr()) { + MCFixupKind FixupKind; + switch (MI.getOpcode()) { + default: + FixupKind = pcrel ? FK_PCRel_2 : FK_Data_2; + Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind)); + break; + case MBlaze::ORI32: + case MBlaze::ADDIK32: + case MBlaze::BRLID32: + FixupKind = pcrel ? FK_PCRel_4 : FK_Data_4; + Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind)); + break; + } + } +} + + + +void MBlazeMCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + // Keep track of the current byte being emitted. + unsigned CurByte = 0; + + // Emit an IMM instruction if the instruction we are encoding requires it + EmitIMM(MI,CurByte,OS); + + switch ((TSFlags & MBlazeII::FormMask)) { + default: break; + case MBlazeII::FPseudo: + // Pseudo instructions don't get encoded. + return; + case MBlazeII::FRRI: + EmitImmediate(MI, 2, false, CurByte, OS, Fixups); + break; + case MBlazeII::FRIR: + EmitImmediate(MI, 1, false, CurByte, OS, Fixups); + break; + case MBlazeII::FCRI: + EmitImmediate(MI, 1, true, CurByte, OS, Fixups); + break; + case MBlazeII::FRCI: + EmitImmediate(MI, 1, true, CurByte, OS, Fixups); + case MBlazeII::FCCI: + EmitImmediate(MI, 0, true, CurByte, OS, Fixups); + break; + } + + ++MCNumEmitted; // Keep track of the # of mi's emitted + unsigned Value = getBinaryCodeForInstr(MI); + EmitConstant(Value, 4, CurByte, OS); +} + +// FIXME: These #defines shouldn't be necessary. Instead, tblgen should +// be able to generate code emitter helpers for either variant, like it +// does for the AsmWriter. +#define MBlazeCodeEmitter MBlazeMCCodeEmitter +#define MachineInstr MCInst +#include "MBlazeGenCodeEmitter.inc" +#undef MBlazeCodeEmitter +#undef MachineInstr diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp new file mode 100644 index 0000000..a7e400b --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp @@ -0,0 +1,166 @@ +//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower MBlaze MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "MBlazeMCInstLower.h" +#include "MBlazeInstrInfo.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + +MCSymbol *MBlazeMCInstLower:: +GetGlobalAddressSymbol(const MachineOperand &MO) const { + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + return Printer.Mang->getSymbol(MO.getGlobal()); +} + +MCSymbol *MBlazeMCInstLower:: +GetExternalSymbolSymbol(const MachineOperand &MO) const { + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); +} + +MCSymbol *MBlazeMCInstLower:: +GetJumpTableSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI" + << Printer.getFunctionNumber() << '_' + << MO.getIndex(); + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *MBlazeMCInstLower:: +GetConstantPoolIndexSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI" + << Printer.getFunctionNumber() << '_' + << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + + case 0: break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *MBlazeMCInstLower:: +GetBlockAddressSymbol(const MachineOperand &MO) const { + switch (MO.getTargetFlags()) { + default: + assert(0 && "Unknown target flag on GV operand"); + + case 0: break; + } + + return Printer.GetBlockAddressSymbol(MO.getBlockAddress()); +} + +MCOperand MBlazeMCInstLower:: +LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + + case 0: break; + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + return MCOperand::CreateExpr(Expr); +} + +void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + break; + case MachineOperand::MO_BlockAddress: + MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); + break; + case MachineOperand::MO_FPImmediate: + bool ignored; + APFloat FVal = MO.getFPImm()->getValueAPF(); + FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored); + + APInt IVal = FVal.bitcastToAPInt(); + uint64_t Val = *IVal.getRawData(); + MCOp = MCOperand::CreateImm(Val); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h new file mode 100644 index 0000000..92196f2 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeMCInstLower.h @@ -0,0 +1,50 @@ +//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZE_MCINSTLOWER_H +#define MBLAZE_MCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + class AsmPrinter; + class MCAsmInfo; + class MCContext; + class MCInst; + class MCOperand; + class MCSymbol; + class MachineInstr; + class MachineModuleInfoMachO; + class MachineOperand; + class Mangler; + + /// MBlazeMCInstLower - This class is used to lower an MachineInstr + /// into an MCInst. +class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower { + MCContext &Ctx; + Mangler &Mang; + + AsmPrinter &Printer; +public: + MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) + : Ctx(ctx), Mang(mang), Printer(printer) {} + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; +}; + +} + +#endif diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h index 1f956c1..df39509 100644 --- a/lib/Target/MBlaze/MBlazeMachineFunction.h +++ b/lib/Target/MBlaze/MBlazeMachineFunction.h @@ -14,6 +14,7 @@ #ifndef MBLAZE_MACHINE_FUNCTION_INFO_H #define MBLAZE_MACHINE_FUNCTION_INFO_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,20 +27,14 @@ namespace llvm { class MBlazeFunctionInfo : public MachineFunctionInfo { private: - /// Holds for each function where on the stack the Frame Pointer must be + /// Holds for each function where on the stack the Frame Pointer must be /// saved. This is used on Prologue and Epilogue to emit FP save/restore int FPStackOffset; - /// Holds for each function where on the stack the Return Address must be + /// Holds for each function where on the stack the Return Address must be /// saved. This is used on Prologue and Epilogue to emit RA save/restore int RAStackOffset; - /// At each function entry a special bitmask directive must be emitted - /// to help in debugging CPU callee saved registers. It needs a negative - /// offset from the final stack size and its higher register location on - /// the stack. - int CPUTopSavedRegOff; - /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset struct MBlazeFIHolder { @@ -50,25 +45,30 @@ private: : FI(FrameIndex), SPOffset(StackPointerOffset) {} }; - /// When PIC is used the GP must be saved on the stack on the function - /// prologue and must be reloaded from this stack location after every - /// call. A reference to its stack location and frame index must be kept + /// When PIC is used the GP must be saved on the stack on the function + /// prologue and must be reloaded from this stack location after every + /// call. A reference to its stack location and frame index must be kept /// to be used on emitPrologue and processFunctionBeforeFrameFinalized. MBlazeFIHolder GPHolder; /// On LowerFormalArguments the stack size is unknown, so the Stack - /// Pointer Offset calculation of "not in register arguments" must be - /// postponed to emitPrologue. + /// Pointer Offset calculation of "not in register arguments" must be + /// postponed to emitPrologue. SmallVector<MBlazeFIHolder, 16> FnLoadArgs; bool HasLoadArgs; - // When VarArgs, we must write registers back to caller stack, preserving - // on register arguments. Since the stack size is unknown on + // When VarArgs, we must write registers back to caller stack, preserving + // on register arguments. Since the stack size is unknown on // LowerFormalArguments, the Stack Pointer Offset calculation must be - // postponed to emitPrologue. + // postponed to emitPrologue. SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs; bool HasStoreVarArgs; + // When determining the final stack layout some of the frame indexes may + // be replaced by new frame indexes that reside in the caller's stack + // frame. The replacements are recorded in this structure. + DenseMap<int,int> FIReplacements; + /// SRetReturnReg - Some subtargets require that sret lowering includes /// returning the value of the returned struct in a register. This field /// holds the virtual register into which the sret argument is passed. @@ -82,11 +82,15 @@ private: // VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; + /// LiveInFI - keeps track of the frame indexes in a callers stack + /// frame that are live into a function. + SmallVector<int, 16> LiveInFI; + public: - MBlazeFunctionInfo(MachineFunction& MF) - : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), - GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false), - SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0) + MBlazeFunctionInfo(MachineFunction& MF) + : FPStackOffset(0), RAStackOffset(0), GPHolder(-1,-1), HasLoadArgs(false), + HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0), + VarArgsFrameIndex(0), LiveInFI() {} int getFPStackOffset() const { return FPStackOffset; } @@ -95,9 +99,6 @@ public: int getRAStackOffset() const { return RAStackOffset; } void setRAStackOffset(int Off) { RAStackOffset = Off; } - int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; } - void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; } - int getGPStackOffset() const { return GPHolder.SPOffset; } int getGPFI() const { return GPHolder.FI; } void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; } @@ -105,12 +106,38 @@ public: bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; } bool hasLoadArgs() const { return HasLoadArgs; } - bool hasStoreVarArgs() const { return HasStoreVarArgs; } + bool hasStoreVarArgs() const { return HasStoreVarArgs; } + + void recordLiveIn(int FI) { + LiveInFI.push_back(FI); + } + + bool isLiveIn(int FI) { + for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) + if (FI == LiveInFI[i]) return true; + + return false; + } + + const SmallVector<int, 16>& getLiveIn() const { return LiveInFI; } + + void recordReplacement(int OFI, int NFI) { + FIReplacements.insert(std::make_pair(OFI,NFI)); + } + + bool hasReplacement(int OFI) const { + return FIReplacements.find(OFI) != FIReplacements.end(); + } + + int getReplacement(int OFI) const { + return FIReplacements.lookup(OFI); + } void recordLoadArgsFI(int FI, int SPOffset) { if (!HasLoadArgs) HasLoadArgs=true; FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset)); } + void recordStoreVarArgsFI(int FI, int SPOffset) { if (!HasStoreVarArgs) HasStoreVarArgs=true; FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset)); @@ -118,13 +145,14 @@ public: void adjustLoadArgsFI(MachineFrameInfo *MFI) const { if (!hasLoadArgs()) return; - for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) - MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset ); + for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) + MFI->setObjectOffset(FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset); } + void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const { - if (!hasStoreVarArgs()) return; - for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) - MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset ); + if (!hasStoreVarArgs()) return; + for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) + MFI->setObjectOffset(FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset); } unsigned getSRetReturnReg() const { return SRetReturnReg; } diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 22b6a30..fa9140d 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mblaze-reg-info" +#define DEBUG_TYPE "mblaze-frame-info" #include "MBlaze.h" #include "MBlazeSubtarget.h" @@ -26,7 +26,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" @@ -48,38 +48,62 @@ MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii) /// MBlaze::R0, return the number that it corresponds to (e.g. 0). unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { - case MBlaze::R0 : case MBlaze::F0 : return 0; - case MBlaze::R1 : case MBlaze::F1 : return 1; - case MBlaze::R2 : case MBlaze::F2 : return 2; - case MBlaze::R3 : case MBlaze::F3 : return 3; - case MBlaze::R4 : case MBlaze::F4 : return 4; - case MBlaze::R5 : case MBlaze::F5 : return 5; - case MBlaze::R6 : case MBlaze::F6 : return 6; - case MBlaze::R7 : case MBlaze::F7 : return 7; - case MBlaze::R8 : case MBlaze::F8 : return 8; - case MBlaze::R9 : case MBlaze::F9 : return 9; - case MBlaze::R10 : case MBlaze::F10 : return 10; - case MBlaze::R11 : case MBlaze::F11 : return 11; - case MBlaze::R12 : case MBlaze::F12 : return 12; - case MBlaze::R13 : case MBlaze::F13 : return 13; - case MBlaze::R14 : case MBlaze::F14 : return 14; - case MBlaze::R15 : case MBlaze::F15 : return 15; - case MBlaze::R16 : case MBlaze::F16 : return 16; - case MBlaze::R17 : case MBlaze::F17 : return 17; - case MBlaze::R18 : case MBlaze::F18 : return 18; - case MBlaze::R19 : case MBlaze::F19 : return 19; - case MBlaze::R20 : case MBlaze::F20 : return 20; - case MBlaze::R21 : case MBlaze::F21 : return 21; - case MBlaze::R22 : case MBlaze::F22 : return 22; - case MBlaze::R23 : case MBlaze::F23 : return 23; - case MBlaze::R24 : case MBlaze::F24 : return 24; - case MBlaze::R25 : case MBlaze::F25 : return 25; - case MBlaze::R26 : case MBlaze::F26 : return 26; - case MBlaze::R27 : case MBlaze::F27 : return 27; - case MBlaze::R28 : case MBlaze::F28 : return 28; - case MBlaze::R29 : case MBlaze::F29 : return 29; - case MBlaze::R30 : case MBlaze::F30 : return 30; - case MBlaze::R31 : case MBlaze::F31 : return 31; + case MBlaze::R0 : return 0; + case MBlaze::R1 : return 1; + case MBlaze::R2 : return 2; + case MBlaze::R3 : return 3; + case MBlaze::R4 : return 4; + case MBlaze::R5 : return 5; + case MBlaze::R6 : return 6; + case MBlaze::R7 : return 7; + case MBlaze::R8 : return 8; + case MBlaze::R9 : return 9; + case MBlaze::R10 : return 10; + case MBlaze::R11 : return 11; + case MBlaze::R12 : return 12; + case MBlaze::R13 : return 13; + case MBlaze::R14 : return 14; + case MBlaze::R15 : return 15; + case MBlaze::R16 : return 16; + case MBlaze::R17 : return 17; + case MBlaze::R18 : return 18; + case MBlaze::R19 : return 19; + case MBlaze::R20 : return 20; + case MBlaze::R21 : return 21; + case MBlaze::R22 : return 22; + case MBlaze::R23 : return 23; + case MBlaze::R24 : return 24; + case MBlaze::R25 : return 25; + case MBlaze::R26 : return 26; + case MBlaze::R27 : return 27; + case MBlaze::R28 : return 28; + case MBlaze::R29 : return 29; + case MBlaze::R30 : return 30; + case MBlaze::R31 : return 31; + case MBlaze::RPC : return 0x0000; + case MBlaze::RMSR : return 0x0001; + case MBlaze::REAR : return 0x0003; + case MBlaze::RESR : return 0x0005; + case MBlaze::RFSR : return 0x0007; + case MBlaze::RBTR : return 0x000B; + case MBlaze::REDR : return 0x000D; + case MBlaze::RPID : return 0x1000; + case MBlaze::RZPR : return 0x1001; + case MBlaze::RTLBX : return 0x1002; + case MBlaze::RTLBLO : return 0x1003; + case MBlaze::RTLBHI : return 0x1004; + case MBlaze::RPVR0 : return 0x2000; + case MBlaze::RPVR1 : return 0x2001; + case MBlaze::RPVR2 : return 0x2002; + case MBlaze::RPVR3 : return 0x2003; + case MBlaze::RPVR4 : return 0x2004; + case MBlaze::RPVR5 : return 0x2005; + case MBlaze::RPVR6 : return 0x2006; + case MBlaze::RPVR7 : return 0x2007; + case MBlaze::RPVR8 : return 0x2008; + case MBlaze::RPVR9 : return 0x2009; + case MBlaze::RPVR10 : return 0x200A; + case MBlaze::RPVR11 : return 0x200B; default: llvm_unreachable("Unknown register number!"); } return 0; // Not reached @@ -126,6 +150,37 @@ unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) { return 0; // Not reached } +unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) { + switch (Reg) { + case 0x0000 : return MBlaze::RPC; + case 0x0001 : return MBlaze::RMSR; + case 0x0003 : return MBlaze::REAR; + case 0x0005 : return MBlaze::RESR; + case 0x0007 : return MBlaze::RFSR; + case 0x000B : return MBlaze::RBTR; + case 0x000D : return MBlaze::REDR; + case 0x1000 : return MBlaze::RPID; + case 0x1001 : return MBlaze::RZPR; + case 0x1002 : return MBlaze::RTLBX; + case 0x1003 : return MBlaze::RTLBLO; + case 0x1004 : return MBlaze::RTLBHI; + case 0x2000 : return MBlaze::RPVR0; + case 0x2001 : return MBlaze::RPVR1; + case 0x2002 : return MBlaze::RPVR2; + case 0x2003 : return MBlaze::RPVR3; + case 0x2004 : return MBlaze::RPVR4; + case 0x2005 : return MBlaze::RPVR5; + case 0x2006 : return MBlaze::RPVR6; + case 0x2007 : return MBlaze::RPVR7; + case 0x2008 : return MBlaze::RPVR8; + case 0x2009 : return MBlaze::RPVR9; + case 0x200A : return MBlaze::RPVR10; + case 0x200B : return MBlaze::RPVR11; + default: llvm_unreachable("Unknown register number!"); + } + return 0; // Not reached +} + unsigned MBlazeRegisterInfo::getPICCallReg() { return MBlaze::R20; } @@ -164,77 +219,40 @@ getReservedRegs(const MachineFunction &MF) const { return Reserved; } -//===----------------------------------------------------------------------===// -// -// Stack Frame Processing methods -// +----------------------------+ -// -// The stack is allocated decrementing the stack pointer on -// the first instruction of a function prologue. Once decremented, -// all stack references are are done through a positive offset -// from the stack/frame pointer, so the stack is considered -// to grow up. -// -//===----------------------------------------------------------------------===// - -void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); - - // See the description at MicroBlazeMachineFunction.h - int TopCPUSavedRegOff = -1; - - // Adjust CPU Callee Saved Registers Area. Registers RA and FP must - // be saved in this CPU Area there is the need. This whole Area must - // be aligned to the default Stack Alignment requirements. - unsigned StackOffset = MFI->getStackSize(); - unsigned RegSize = 4; - - // Replace the dummy '0' SPOffset by the negative offsets, as explained on - // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid - // the approach done by calculateFrameObjectOffsets to the stack frame. - MBlazeFI->adjustLoadArgsFI(MFI); - MBlazeFI->adjustStoreVarArgsFI(MFI); - - if (hasFP(MF)) { - MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), - StackOffset); - MBlazeFI->setFPStackOffset(StackOffset); - TopCPUSavedRegOff = StackOffset; - StackOffset += RegSize; - } - - if (MFI->adjustsStack()) { - MBlazeFI->setRAStackOffset(0); - MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), - StackOffset); - TopCPUSavedRegOff = StackOffset; - StackOffset += RegSize; - } - - // Update frame info - MFI->setStackSize(StackOffset); - - // Recalculate the final tops offset. The final values must be '0' - // if there isn't a callee saved register for CPU or FPU, otherwise - // a negative offset is needed. - if (TopCPUSavedRegOff >= 0) - MBlazeFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); -} - -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); -} - -// This function eliminate ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions +// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions void MBlazeRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { + // If we have a frame pointer, turn the adjcallstackup instruction into a + // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into + // 'addi r1, r1, <amt>' + MachineInstr *Old = I; + int Amount = Old->getOperand(0).getImm() + 4; + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = TFI->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + MachineInstr *New; + if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) { + New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1) + .addReg(MBlaze::R1).addImm(-Amount); + } else { + assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP); + New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1) + .addReg(MBlaze::R1).addImm(Amount); + } + + // Replace the pseudo instruction with a new instruction... + MBB.insert(I, New); + } + } + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. MBB.erase(I); } @@ -247,6 +265,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -257,117 +276,34 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned oi = i == 2 ? 1 : 2; - DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; - errs() << "<--------->\n" << MI); + DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + dbgs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); - int stackSize = MF.getFrameInfo()->getStackSize(); - int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + int stackSize = MFI->getStackSize(); + int spOffset = MFI->getObjectOffset(FrameIndex); - DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" + DEBUG(MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); + dbgs() << "FrameIndex : " << FrameIndex << "\n" << "spOffset : " << spOffset << "\n" - << "stackSize : " << stackSize << "\n"); + << "stackSize : " << stackSize << "\n" + << "isFixed : " << MFI->isFixedObjectIndex(FrameIndex) << "\n" + << "isLiveIn : " << MBlazeFI->isLiveIn(FrameIndex) << "\n" + << "isSpill : " << MFI->isSpillSlotObjectIndex(FrameIndex) + << "\n" ); // as explained on LowerFormalArguments, detect negative offsets // and adjust SPOffsets considering the final stack size. - int Offset = (spOffset < 0) ? (stackSize - spOffset) : (spOffset + 4); - Offset += MI.getOperand(oi).getImm(); + int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset; + Offset += MI.getOperand(oi).getImm(); - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + DEBUG(dbgs() << "Offset : " << Offset << "\n" << "<--------->\n"); MI.getOperand(oi).ChangeToImmediate(Offset); MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); } void MBlazeRegisterInfo:: -emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Get the right frame order for MBlaze. - adjustMBlazeStackFrame(MF); - - // Get the number of bytes to allocate from the FrameInfo. - unsigned StackSize = MFI->getStackSize(); - - // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->adjustsStack()) return; - if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28; - - int FPOffset = MBlazeFI->getFPStackOffset(); - int RAOffset = MBlazeFI->getRAStackOffset(); - - // Adjust stack : addi R1, R1, -imm - BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDI), MBlaze::R1) - .addReg(MBlaze::R1).addImm(-StackSize); - - // Save the return address only if the function isnt a leaf one. - // swi R15, R1, stack_loc - if (MFI->adjustsStack()) { - BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI)) - .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1); - } - - // if framepointer enabled, save it and set it - // to point to the stack pointer - if (hasFP(MF)) { - // swi R19, R1, stack_loc - BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI)) - .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1); - - // add R19, R1, R0 - BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19) - .addReg(MBlaze::R1).addReg(MBlaze::R0); - } -} - -void MBlazeRegisterInfo:: -emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); - DebugLoc dl = MBBI->getDebugLoc(); - - // Get the FI's where RA and FP are saved. - int FPOffset = MBlazeFI->getFPStackOffset(); - int RAOffset = MBlazeFI->getRAStackOffset(); - - // if framepointer enabled, restore it and restore the - // stack pointer - if (hasFP(MF)) { - // add R1, R19, R0 - BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1) - .addReg(MBlaze::R19).addReg(MBlaze::R0); - - // lwi R19, R1, stack_loc - BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19) - .addImm(FPOffset).addReg(MBlaze::R1); - } - - // Restore the return address only if the function isnt a leaf one. - // lwi R15, R1, stack_loc - if (MFI->adjustsStack()) { - BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15) - .addImm(RAOffset).addReg(MBlaze::R1); - } - - // Get the number of bytes from FrameInfo - int StackSize = (int) MFI->getStackSize(); - if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28; - - // adjust stack. - // addi R1, R1, imm - if (StackSize) { - BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1) - .addReg(MBlaze::R1).addImm(StackSize); - } -} - - -void MBlazeRegisterInfo:: processFunctionBeforeFrameFinalized(MachineFunction &MF) const { // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -381,7 +317,9 @@ unsigned MBlazeRegisterInfo::getRARegister() const { } unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return hasFP(MF) ? MBlaze::R19 : MBlaze::R1; + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + return TFI->hasFP(MF) ? MBlaze::R19 : MBlaze::R1; } unsigned MBlazeRegisterInfo::getEHExceptionRegister() const { @@ -394,9 +332,8 @@ unsigned MBlazeRegisterInfo::getEHHandlerRegister() const { return 0; } -int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("What is the dwarf register number"); - return -1; +int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { + return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0); } #include "MBlazeGenRegisterInfo.inc" diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 1e1fde1..839536d 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -25,8 +25,8 @@ class TargetInstrInfo; class Type; namespace MBlaze { - /// SubregIndex - The index of various sized subregister classes. Note that - /// these indices must be kept in sync with the class indices in the + /// SubregIndex - The index of various sized subregister classes. Note that + /// these indices must be kept in sync with the class indices in the /// MBlazeRegisterInfo.td file. enum SubregIndex { SUBREG_FPEVEN = 1, SUBREG_FPODD = 2 @@ -36,7 +36,7 @@ namespace MBlaze { struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { const MBlazeSubtarget &Subtarget; const TargetInstrInfo &TII; - + MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget, const TargetInstrInfo &tii); @@ -44,20 +44,16 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { /// MBlaze::RA, return the number that it corresponds to (e.g. 31). static unsigned getRegisterNumbering(unsigned RegEnum); static unsigned getRegisterFromNumbering(unsigned RegEnum); + static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum); /// Get PIC indirect call register static unsigned getPICCallReg(); - /// Adjust the MBlaze stack frame. - void adjustMBlazeStackFrame(MachineFunction &MF) const; - /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -68,9 +64,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - /// Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; @@ -79,11 +72,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; - /// targetHandlesStackFrameRounding - Returns true if the target is - /// responsible for rounding up the stack frame (probably at emitPrologue - /// time). - bool targetHandlesStackFrameRounding() const { return true; } - int getDwarfRegNum(unsigned RegNum, bool isEH) const; }; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index 5e93510..fbefb22 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -17,15 +17,20 @@ class MBlazeReg<string n> : Register<n> { let Namespace = "MBlaze"; } -// MBlaze CPU Registers +// Special purpose registers have 15-bit values +class MBlazeSReg<string n> : Register<n> { + field bits<15> Num; + let Namespace = "MBlaze"; +} + +// MBlaze general purpose registers class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> { let Num = num; } -// MBlaze 32-bit (aliased) FPU Registers -class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> { +// MBlaze special purpose registers +class MBlazeSPRReg<bits<15> num, string n> : MBlazeSReg<n> { let Num = num; - let Aliases = aliases; } //===----------------------------------------------------------------------===// @@ -33,7 +38,6 @@ class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> { //===----------------------------------------------------------------------===// let Namespace = "MBlaze" in { - // General Purpose Registers def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>; def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>; @@ -68,46 +72,43 @@ let Namespace = "MBlaze" in { def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>; def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>; - /// MBlaze Single point precision FPU Registers - def F0 : FPR< 0, "r0", [R0]>, DwarfRegNum<[32]>; - def F1 : FPR< 1, "r1", [R1]>, DwarfRegNum<[33]>; - def F2 : FPR< 2, "r2", [R2]>, DwarfRegNum<[34]>; - def F3 : FPR< 3, "r3", [R3]>, DwarfRegNum<[35]>; - def F4 : FPR< 4, "r4", [R4]>, DwarfRegNum<[36]>; - def F5 : FPR< 5, "r5", [R5]>, DwarfRegNum<[37]>; - def F6 : FPR< 6, "r6", [R6]>, DwarfRegNum<[38]>; - def F7 : FPR< 7, "r7", [R7]>, DwarfRegNum<[39]>; - def F8 : FPR< 8, "r8", [R8]>, DwarfRegNum<[40]>; - def F9 : FPR< 9, "r9", [R9]>, DwarfRegNum<[41]>; - def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>; - def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>; - def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>; - def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>; - def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>; - def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>; - def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>; - def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>; - def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>; - def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>; - def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>; - def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>; - def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>; - def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>; - def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>; - def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>; - def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>; - def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>; - def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>; - def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>; - def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>; - def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>; + // Special Purpose Registers + def RPC : MBlazeSPRReg<0x0000, "rpc">, DwarfRegNum<[32]>; + def RMSR : MBlazeSPRReg<0x0001, "rmsr">, DwarfRegNum<[33]>; + def REAR : MBlazeSPRReg<0x0003, "rear">, DwarfRegNum<[34]>; + def RESR : MBlazeSPRReg<0x0005, "resr">, DwarfRegNum<[35]>; + def RFSR : MBlazeSPRReg<0x0007, "rfsr">, DwarfRegNum<[36]>; + def RBTR : MBlazeSPRReg<0x000B, "rbtr">, DwarfRegNum<[37]>; + def REDR : MBlazeSPRReg<0x000D, "redr">, DwarfRegNum<[38]>; + def RPID : MBlazeSPRReg<0x1000, "rpid">, DwarfRegNum<[39]>; + def RZPR : MBlazeSPRReg<0x1001, "rzpr">, DwarfRegNum<[40]>; + def RTLBX : MBlazeSPRReg<0x1002, "rtlbx">, DwarfRegNum<[41]>; + def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>; + def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>; + def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[44]>; + def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[45]>; + def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[46]>; + def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[47]>; + def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[48]>; + def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[49]>; + def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[50]>; + def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[51]>; + def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[52]>; + def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[53]>; + def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[54]>; + def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[55]>; + + // The carry bit. In the Microblaze this is really bit 29 of the + // MSR register but this is the only bit of that register that we + // are interested in modeling. + def CARRY : MBlazeSPRReg<0x0000, "rmsr[c]">, DwarfRegNum<[33]>; } //===----------------------------------------------------------------------===// // Register Classes //===----------------------------------------------------------------------===// -def CPURegs : RegisterClass<"MBlaze", [i32], 32, +def GPR : RegisterClass<"MBlaze", [i32,f32], 32, [ // Return Values and Arguments R3, R4, R5, R6, R7, R8, R9, R10, @@ -135,46 +136,55 @@ def CPURegs : RegisterClass<"MBlaze", [i32], 32, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - CPURegsClass::iterator - CPURegsClass::allocation_order_end(const MachineFunction &MF) const { + GPRClass::iterator + GPRClass::allocation_order_end(const MachineFunction &MF) const { // The last 10 registers on the list above are reserved return end()-10; } }]; } -def FGR32 : RegisterClass<"MBlaze", [f32], 32, +def SPR : RegisterClass<"MBlaze", [i32], 32, [ - // Return Values and Arguments - F3, F4, F5, F6, F7, F8, F9, F10, - - // Not preserved across procedure calls - F11, F12, - - // Callee save - F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31, - // Reserved - F0, // Always zero - F1, // The stack pointer - F2, // Read-only small data area anchor - F13, // Read-write small data area anchor - F14, // Return address for interrupts - F15, // Return address for sub-routines - F16, // Return address for trap - F17, // Return address for exceptions - F18, // Reserved for assembler - F19 // The frame pointer + RPC, + RMSR, + REAR, + RESR, + RFSR, + RBTR, + REDR, + RPID, + RZPR, + RTLBX, + RTLBLO, + RTLBHI, + RPVR0, + RPVR1, + RPVR2, + RPVR3, + RPVR4, + RPVR5, + RPVR6, + RPVR7, + RPVR8, + RPVR9, + RPVR10, + RPVR11 ]> { let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - FGR32Class::iterator - FGR32Class::allocation_order_end(const MachineFunction &MF) const { - // The last 10 registers on the list above are reserved - return end()-10; + SPRClass::iterator + SPRClass::allocation_order_end(const MachineFunction &MF) const { + // None of the special purpose registers are allocatable. + return end()-24; } }]; } + +def CRC : RegisterClass<"MBlaze", [i32], 32, [CARRY]> { + let CopyCost = -1; +} diff --git a/lib/Target/MBlaze/MBlazeRelocations.h b/lib/Target/MBlaze/MBlazeRelocations.h new file mode 100644 index 0000000..c298eda --- /dev/null +++ b/lib/Target/MBlaze/MBlazeRelocations.h @@ -0,0 +1,47 @@ +//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MBlaze target-specific relocation types. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZERELOCATIONS_H +#define MBLAZERELOCATIONS_H + +#include "llvm/CodeGen/MachineRelocation.h" + +namespace llvm { + namespace MBlaze { + enum RelocationType { + /// reloc_pcrel_word - PC relative relocation, add the relocated value to + /// the value already in memory, after we adjust it for where the PC is. + reloc_pcrel_word = 0, + + /// reloc_picrel_word - PIC base relative relocation, add the relocated + /// value to the value already in memory, after we adjust it for where the + /// PIC base is. + reloc_picrel_word = 1, + + /// reloc_absolute_word - absolute relocation, just add the relocated + /// value to the value already in memory. + reloc_absolute_word = 2, + + /// reloc_absolute_word_sext - absolute relocation, just add the relocated + /// value to the value already in memory. In object files, it represents a + /// value which must be sign-extended when resolving the relocation. + reloc_absolute_word_sext = 3, + + /// reloc_absolute_dword - absolute relocation, just add the relocated + /// value to the value already in memory. + reloc_absolute_dword = 4 + }; + } +} + +#endif diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 4a65542..ac4d98c 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -14,7 +14,7 @@ def ALU : FuncUnit; def IMULDIV : FuncUnit; //===----------------------------------------------------------------------===// -// Instruction Itinerary classes used for MBlaze +// Instruction Itinerary classes used for MBlaze //===----------------------------------------------------------------------===// def IIAlu : InstrItinClass; def IILoad : InstrItinClass; @@ -41,7 +41,7 @@ def IIPseudo : InstrItinClass; // MBlaze Generic instruction itineraries. //===----------------------------------------------------------------------===// def MBlazeGenericItineraries : ProcessorItineraries< - [ALU, IMULDIV], [ + [ALU, IMULDIV], [], [ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 4252953..cd949e1 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -15,13 +15,62 @@ #include "MBlazeMCAsmInfo.h" #include "MBlazeTargetMachine.h" #include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + default: + return new MBlazeMCAsmInfo(); + } +} + +static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, + MCContext &Ctx, TargetAsmBackend &TAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + case Triple::Darwin: + llvm_unreachable("MBlaze does not support Darwin MACH-O format"); + return NULL; + case Triple::MinGW32: + case Triple::Cygwin: + case Triple::Win32: + llvm_unreachable("MBlaze does not support Windows COFF format"); + return NULL; + default: + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, + NoExecStack); + } +} + + extern "C" void LLVMInitializeMBlazeTarget() { // Register the target. RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget); - RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget); + + // Register the target asm info. + RegisterAsmInfoFn A(TheMBlazeTarget, createMCAsmInfo); + + // Register the MC code emitter + TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget, + llvm::createMBlazeMCCodeEmitter); + + // Register the asm backend + TargetRegistry::RegisterAsmBackend(TheMBlazeTarget, + createMBlazeAsmBackend); + + // Register the object streamer + TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget, + createMCStreamer); + } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -35,11 +84,10 @@ MBlazeTargetMachine(const Target &T, const std::string &TT, const std::string &FS): LLVMTargetMachine(T, TT), Subtarget(TT, FS), - DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-" - "f64:32:32-v64:32:32-v128:32:32-n32"), + DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), - FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), - TLInfo(*this), TSInfo(*this) { + FrameLowering(Subtarget), + TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) { if (getRelocationModel() == Reloc::Default) { setRelocationModel(Reloc::Static); } @@ -50,8 +98,8 @@ MBlazeTargetMachine(const Target &T, const std::string &TT, // Install an instruction selector pass using // the ISelDag to gen MBlaze code. -bool MBlazeTargetMachine:: -addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { +bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { PM.add(createMBlazeISelDag(*this)); return false; } @@ -59,8 +107,8 @@ addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // Implemented by targets that want to run passes immediately before // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. -bool MBlazeTargetMachine:: -addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { +bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { PM.add(createMBlazeDelaySlotFillerPass(*this)); return true; } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 6a57e58..45ad078 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -19,21 +19,25 @@ #include "MBlazeISelLowering.h" #include "MBlazeSelectionDAGInfo.h" #include "MBlazeIntrinsicInfo.h" +#include "MBlazeFrameLowering.h" +#include "MBlazeELFWriterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" namespace llvm { class formatted_raw_ostream; class MBlazeTargetMachine : public LLVMTargetMachine { - MBlazeSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - MBlazeInstrInfo InstrInfo; - TargetFrameInfo FrameInfo; - MBlazeTargetLowering TLInfo; + MBlazeSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + MBlazeInstrInfo InstrInfo; + MBlazeFrameLowering FrameLowering; + MBlazeTargetLowering TLInfo; MBlazeSelectionDAGInfo TSInfo; - MBlazeIntrinsicInfo IntrinsicInfo; + MBlazeIntrinsicInfo IntrinsicInfo; + MBlazeELFWriterInfo ELFWriterInfo; public: MBlazeTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -41,8 +45,8 @@ namespace llvm { virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameInfo *getFrameInfo() const - { return &FrameInfo; } + virtual const TargetFrameLowering *getFrameLowering() const + { return &FrameLowering; } virtual const MBlazeSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -62,12 +66,13 @@ namespace llvm { const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } - // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); + virtual const MBlazeELFWriterInfo *getELFWriterInfo() const { + return &ELFWriterInfo; + } - virtual bool addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt); + virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt); }; } // End llvm namespace diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp index 05c01ef..abd1b0b 100644 --- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -16,6 +16,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" using namespace llvm; void MBlazeTargetObjectFile:: @@ -23,13 +24,13 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); SmallDataSection = - getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getDataRel()); SmallBSSSection = - getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getBSS()); } diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/lib/Target/MBlaze/MBlazeTargetObjectFile.h index 20e7702..c313722 100644 --- a/lib/Target/MBlaze/MBlazeTargetObjectFile.h +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.h @@ -18,10 +18,9 @@ namespace llvm { const MCSection *SmallDataSection; const MCSection *SmallBSSSection; public: - + void Initialize(MCContext &Ctx, const TargetMachine &TM); - /// IsGlobalInSmallSection - Return true if this global address should be /// placed into small data/bss section. bool IsGlobalInSmallSection(const GlobalValue *GV, @@ -29,8 +28,8 @@ namespace llvm { SectionKind Kind) const; bool IsGlobalInSmallSection(const GlobalValue *GV, - const TargetMachine &TM) const; - + const TargetMachine &TM) const; + const MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile index 19e508c..e01c60b 100644 --- a/lib/Target/MBlaze/Makefile +++ b/lib/Target/MBlaze/Makefile @@ -12,12 +12,14 @@ TARGET = MBlaze # Make sure that tblgen is run, first thing. BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \ - MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \ - MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \ - MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \ - MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc + MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \ + MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \ + MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \ + MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \ + MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc \ + MBlazeGenEDInfo.inc -DIRS = AsmPrinter TargetInfo +DIRS = InstPrinter AsmParser Disassembler TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/MBlaze/TODO b/lib/Target/MBlaze/TODO new file mode 100644 index 0000000..2e613eb --- /dev/null +++ b/lib/Target/MBlaze/TODO @@ -0,0 +1,26 @@ +* Writing out ELF files is close to working but the following needs to + be examined more closely: + - Relocations use 2-byte / 4-byte to terminology in reference to + the size of the immediate value being changed. The Xilinx + terminology seems to be (???) 4-byte / 8-byte in reference + to the number of bytes of instructions that are being changed. + +* Code generation seems to work relatively well now but the following + needs to be examined more closely: + - The stack layout needs to be examined to make sure it meets + the standard, especially in regards to var arg functions. + - The processor itineraries are copied from a different backend + and need to be updated to model the MicroBlaze correctly. + - Look at the MBlazeGenFastISel.inc stuff and make use of it + if appropriate. + +* A basic assembly parser is present now and seems to parse most things. + There are a few things that need to be looked at: + - There are some instructions that are not generated by the backend + and have not been tested as far as the parser is concerned. + - The assembly parser does not use any MicroBlaze specific directives. + I should investigate if there are MicroBlaze specific directive and, + if there are, add them. + - The instruction MFS and MTS use special names for some of the + special registers that can be accessed. These special register + names should be parsed by the assembly parser. diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt index 5afb14d..40696f6 100644 --- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt +++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt @@ -1,4 +1,5 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMMBlazeInfo MBlazeTargetInfo.cpp diff --git a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 4b1f4e6..0000000 --- a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMMSP430AsmPrinter - MSP430AsmPrinter.cpp - MSP430InstPrinter.cpp - MSP430MCInstLower.cpp - ) -add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen) diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp deleted file mode 100644 index 56f72bb..0000000 --- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp +++ /dev/null @@ -1,179 +0,0 @@ -//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to the MSP430 assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "MSP430.h" -#include "MSP430InstrInfo.h" -#include "MSP430InstPrinter.h" -#include "MSP430MCAsmInfo.h" -#include "MSP430MCInstLower.h" -#include "MSP430TargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class MSP430AsmPrinter : public AsmPrinter { - public: - MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "MSP430 Assembly Printer"; - } - - void printOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, const char* Modifier = 0); - void printSrcMemOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O); - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O); - void EmitInstruction(const MachineInstr *MI); - }; -} // end of anonymous namespace - - -void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, const char *Modifier) { - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - default: assert(0 && "Not implemented yet!"); - case MachineOperand::MO_Register: - O << MSP430InstPrinter::getRegisterName(MO.getReg()); - return; - case MachineOperand::MO_Immediate: - if (!Modifier || strcmp(Modifier, "nohash")) - O << '#'; - O << MO.getImm(); - return; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_GlobalAddress: { - bool isMemOp = Modifier && !strcmp(Modifier, "mem"); - uint64_t Offset = MO.getOffset(); - - // If the global address expression is a part of displacement field with a - // register base, we should not emit any prefix symbol here, e.g. - // mov.w &foo, r1 - // vs - // mov.w glb(r1), r2 - // Otherwise (!) msp430-as will silently miscompile the output :( - if (!Modifier || strcmp(Modifier, "nohash")) - O << (isMemOp ? '&' : '#'); - if (Offset) - O << '(' << Offset << '+'; - - O << *Mang->getSymbol(MO.getGlobal()); - - if (Offset) - O << ')'; - - return; - } - case MachineOperand::MO_ExternalSymbol: { - bool isMemOp = Modifier && !strcmp(Modifier, "mem"); - O << (isMemOp ? '&' : '#'); - O << MAI->getGlobalPrefix() << MO.getSymbolName(); - return; - } - } -} - -void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const MachineOperand &Base = MI->getOperand(OpNum); - const MachineOperand &Disp = MI->getOperand(OpNum+1); - - // Print displacement first - - // Imm here is in fact global address - print extra modifier. - if (Disp.isImm() && !Base.getReg()) - O << '&'; - printOperand(MI, OpNum+1, O, "nohash"); - - // Print register base field - if (Base.getReg()) { - O << '('; - printOperand(MI, OpNum, O); - O << ')'; - } -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - - printOperand(MI, OpNo, O); - return false; -} - -bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) { - return true; // Unknown modifier. - } - printSrcMemOperand(MI, OpNo, O); - return false; -} - -//===----------------------------------------------------------------------===// -void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { - MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this); - - MCInst TmpInst; - MCInstLowering.Lower(MI, TmpInst); - OutStreamer.EmitInstruction(TmpInst); -} - -static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI) { - if (SyntaxVariant == 0) - return new MSP430InstPrinter(MAI); - return 0; -} - -// Force static initialization. -extern "C" void LLVMInitializeMSP430AsmPrinter() { - RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target); - TargetRegistry::RegisterMCInstPrinter(TheMSP430Target, - createMSP430MCInstPrinter); -} diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp deleted file mode 100644 index c15d408..0000000 --- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp +++ /dev/null @@ -1,116 +0,0 @@ -//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an MSP430 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "MSP430.h" -#include "MSP430InstrInfo.h" -#include "MSP430InstPrinter.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -using namespace llvm; - - -// Include the auto-generated portion of the assembly writer. -#define MachineInstr MCInst -#include "MSP430GenAsmWriter.inc" -#undef MachineInstr - -void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) { - printInstruction(MI, O); -} - -void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isImm()) - O << Op.getImm(); - else { - assert(Op.isExpr() && "unknown pcrel immediate operand"); - O << *Op.getExpr(); - } -} - -void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier) { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - O << getRegisterName(Op.getReg()); - } else if (Op.isImm()) { - O << '#' << Op.getImm(); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << '#' << *Op.getExpr(); - } -} - -void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, - const char *Modifier) { - const MCOperand &Base = MI->getOperand(OpNo); - const MCOperand &Disp = MI->getOperand(OpNo+1); - - // Print displacement first - - // If the global address expression is a part of displacement field with a - // register base, we should not emit any prefix symbol here, e.g. - // mov.w &foo, r1 - // vs - // mov.w glb(r1), r2 - // Otherwise (!) msp430-as will silently miscompile the output :( - if (!Base.getReg()) - O << '&'; - - if (Disp.isExpr()) - O << *Disp.getExpr(); - else { - assert(Disp.isImm() && "Expected immediate in displacement field"); - O << Disp.getImm(); - } - - // Print register base field - if (Base.getReg()) - O << '(' << getRegisterName(Base.getReg()) << ')'; -} - -void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned CC = MI->getOperand(OpNo).getImm(); - - switch (CC) { - default: - llvm_unreachable("Unsupported CC code"); - break; - case MSP430CC::COND_E: - O << "eq"; - break; - case MSP430CC::COND_NE: - O << "ne"; - break; - case MSP430CC::COND_HS: - O << "hs"; - break; - case MSP430CC::COND_LO: - O << "lo"; - break; - case MSP430CC::COND_GE: - O << "ge"; - break; - case MSP430CC::COND_L: - O << 'l'; - break; - } -} diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h deleted file mode 100644 index f0e1ce2..0000000 --- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.h +++ /dev/null @@ -1,43 +0,0 @@ -//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints a MSP430 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#ifndef MSP430INSTPRINTER_H -#define MSP430INSTPRINTER_H - -#include "llvm/MC/MCInstPrinter.h" - -namespace llvm { - class MCOperand; - - class MSP430InstPrinter : public MCInstPrinter { - public: - MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { - } - - virtual void printInst(const MCInst *MI, raw_ostream &O); - - // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0); - void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0); - void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - - }; -} - -#endif diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp deleted file mode 100644 index d1d9a11..0000000 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp +++ /dev/null @@ -1,150 +0,0 @@ -//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to lower MSP430 MachineInstrs to their corresponding -// MCInst records. -// -//===----------------------------------------------------------------------===// - -#include "MSP430MCInstLower.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/SmallString.h" -using namespace llvm; - -MCSymbol *MSP430MCInstLower:: -GetGlobalAddressSymbol(const MachineOperand &MO) const { - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case 0: break; - } - - return Printer.Mang->getSymbol(MO.getGlobal()); -} - -MCSymbol *MSP430MCInstLower:: -GetExternalSymbolSymbol(const MachineOperand &MO) const { - switch (MO.getTargetFlags()) { - default: assert(0 && "Unknown target flag on GV operand"); - case 0: break; - } - - return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); -} - -MCSymbol *MSP430MCInstLower:: -GetJumpTableSymbol(const MachineOperand &MO) const { - SmallString<256> Name; - raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI" - << Printer.getFunctionNumber() << '_' - << MO.getIndex(); - - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case 0: break; - } - - // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *MSP430MCInstLower:: -GetConstantPoolIndexSymbol(const MachineOperand &MO) const { - SmallString<256> Name; - raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI" - << Printer.getFunctionNumber() << '_' - << MO.getIndex(); - - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case 0: break; - } - - // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *MSP430MCInstLower:: -GetBlockAddressSymbol(const MachineOperand &MO) const { - switch (MO.getTargetFlags()) { - default: assert(0 && "Unknown target flag on GV operand"); - case 0: break; - } - - return Printer.GetBlockAddressSymbol(MO.getBlockAddress()); -} - -MCOperand MSP430MCInstLower:: -LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { - // FIXME: We would like an efficient form for this, so we don't have to do a - // lot of extra uniquing. - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); - - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case 0: break; - } - - if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), Ctx), - Ctx); - return MCOperand::CreateExpr(Expr); -} - -void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { - OutMI.setOpcode(MI->getOpcode()); - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - - MCOperand MCOp; - switch (MO.getType()) { - default: - MI->dump(); - assert(0 && "unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) continue; - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), Ctx)); - break; - case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); - break; - case MachineOperand::MO_JumpTableIndex: - MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); - break; - case MachineOperand::MO_ConstantPoolIndex: - MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); - break; - case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); - } - - OutMI.addOperand(MCOp); - } -} diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h deleted file mode 100644 index e937696..0000000 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h +++ /dev/null @@ -1,50 +0,0 @@ -//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef MSP430_MCINSTLOWER_H -#define MSP430_MCINSTLOWER_H - -#include "llvm/Support/Compiler.h" - -namespace llvm { - class AsmPrinter; - class MCAsmInfo; - class MCContext; - class MCInst; - class MCOperand; - class MCSymbol; - class MachineInstr; - class MachineModuleInfoMachO; - class MachineOperand; - class Mangler; - - /// MSP430MCInstLower - This class is used to lower an MachineInstr - /// into an MCInst. -class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower { - MCContext &Ctx; - Mangler &Mang; - - AsmPrinter &Printer; -public: - MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) - : Ctx(ctx), Mang(mang), Printer(printer) {} - void Lower(const MachineInstr *MI, MCInst &OutMI) const; - - MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - - MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; - MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; - MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; - MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; - MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; -}; - -} - -#endif diff --git a/lib/Target/MSP430/AsmPrinter/Makefile b/lib/Target/MSP430/AsmPrinter/Makefile deleted file mode 100644 index a5293ab..0000000 --- a/lib/Target/MSP430/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMMSP430AsmPrinter - -# Hack: we need to include 'main' MSP430 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index a3f60d2..2c7cbb6 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -15,11 +15,15 @@ add_llvm_target(MSP430CodeGen MSP430ISelDAGToDAG.cpp MSP430ISelLowering.cpp MSP430InstrInfo.cpp + MSP430FrameLowering.cpp MSP430MCAsmInfo.cpp MSP430RegisterInfo.cpp MSP430Subtarget.cpp MSP430TargetMachine.cpp MSP430SelectionDAGInfo.cpp + MSP430AsmPrinter.cpp + MSP430MCInstLower.cpp ) -target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG) +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..f5458d5 --- /dev/null +++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMSP430AsmPrinter + MSP430InstPrinter.cpp + ) +add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen) diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp new file mode 100644 index 0000000..e10d4fe --- /dev/null +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -0,0 +1,113 @@ +//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an MSP430 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "MSP430.h" +#include "MSP430InstPrinter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + + +// Include the auto-generated portion of the assembly writer. +#include "MSP430GenAsmWriter.inc" + +void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) { + printInstruction(MI, O); +} + +void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) + O << Op.getImm(); + else { + assert(Op.isExpr() && "unknown pcrel immediate operand"); + O << *Op.getExpr(); + } +} + +void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, const char *Modifier) { + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + O << getRegisterName(Op.getReg()); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << '#' << *Op.getExpr(); + } +} + +void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, + const char *Modifier) { + const MCOperand &Base = MI->getOperand(OpNo); + const MCOperand &Disp = MI->getOperand(OpNo+1); + + // Print displacement first + + // If the global address expression is a part of displacement field with a + // register base, we should not emit any prefix symbol here, e.g. + // mov.w &foo, r1 + // vs + // mov.w glb(r1), r2 + // Otherwise (!) msp430-as will silently miscompile the output :( + if (!Base.getReg()) + O << '&'; + + if (Disp.isExpr()) + O << *Disp.getExpr(); + else { + assert(Disp.isImm() && "Expected immediate in displacement field"); + O << Disp.getImm(); + } + + // Print register base field + if (Base.getReg()) + O << '(' << getRegisterName(Base.getReg()) << ')'; +} + +void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned CC = MI->getOperand(OpNo).getImm(); + + switch (CC) { + default: + llvm_unreachable("Unsupported CC code"); + break; + case MSP430CC::COND_E: + O << "eq"; + break; + case MSP430CC::COND_NE: + O << "ne"; + break; + case MSP430CC::COND_HS: + O << "hs"; + break; + case MSP430CC::COND_LO: + O << "lo"; + break; + case MSP430CC::COND_GE: + O << "ge"; + break; + case MSP430CC::COND_L: + O << 'l'; + break; + } +} diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h new file mode 100644 index 0000000..f0e1ce2 --- /dev/null +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -0,0 +1,43 @@ +//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a MSP430 MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430INSTPRINTER_H +#define MSP430INSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + + class MSP430InstPrinter : public MCInstPrinter { + public: + MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { + } + + virtual void printInst(const MCInst *MI, raw_ostream &O); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + const char *Modifier = 0); + void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + const char *Modifier = 0); + void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + }; +} + +#endif diff --git a/lib/Target/MSP430/InstPrinter/Makefile b/lib/Target/MSP430/InstPrinter/Makefile new file mode 100644 index 0000000..a5293ab --- /dev/null +++ b/lib/Target/MSP430/InstPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMSP430AsmPrinter + +# Hack: we need to include 'main' MSP430 target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td index 0f08e3d..5cc5e6e 100644 --- a/lib/Target/MSP430/MSP430.td +++ b/lib/Target/MSP430/MSP430.td @@ -52,6 +52,7 @@ def MSP430InstrInfo : InstrInfo; def MSP430InstPrinter : AsmWriter { string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp new file mode 100644 index 0000000..a1a7f44 --- /dev/null +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -0,0 +1,179 @@ +//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the MSP430 assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "MSP430.h" +#include "MSP430InstrInfo.h" +#include "MSP430MCAsmInfo.h" +#include "MSP430MCInstLower.h" +#include "MSP430TargetMachine.h" +#include "InstPrinter/MSP430InstPrinter.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class MSP430AsmPrinter : public AsmPrinter { + public: + MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "MSP430 Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char* Modifier = 0); + void printSrcMemOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O); + void EmitInstruction(const MachineInstr *MI); + }; +} // end of anonymous namespace + + +void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + default: assert(0 && "Not implemented yet!"); + case MachineOperand::MO_Register: + O << MSP430InstPrinter::getRegisterName(MO.getReg()); + return; + case MachineOperand::MO_Immediate: + if (!Modifier || strcmp(Modifier, "nohash")) + O << '#'; + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_GlobalAddress: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + uint64_t Offset = MO.getOffset(); + + // If the global address expression is a part of displacement field with a + // register base, we should not emit any prefix symbol here, e.g. + // mov.w &foo, r1 + // vs + // mov.w glb(r1), r2 + // Otherwise (!) msp430-as will silently miscompile the output :( + if (!Modifier || strcmp(Modifier, "nohash")) + O << (isMemOp ? '&' : '#'); + if (Offset) + O << '(' << Offset << '+'; + + O << *Mang->getSymbol(MO.getGlobal()); + + if (Offset) + O << ')'; + + return; + } + case MachineOperand::MO_ExternalSymbol: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + O << (isMemOp ? '&' : '#'); + O << MAI->getGlobalPrefix() << MO.getSymbolName(); + return; + } + } +} + +void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + const MachineOperand &Base = MI->getOperand(OpNum); + const MachineOperand &Disp = MI->getOperand(OpNum+1); + + // Print displacement first + + // Imm here is in fact global address - print extra modifier. + if (Disp.isImm() && !Base.getReg()) + O << '&'; + printOperand(MI, OpNum+1, O, "nohash"); + + // Print register base field + if (Base.getReg()) { + O << '('; + printOperand(MI, OpNum, O); + O << ')'; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + printOperand(MI, OpNo, O); + return false; +} + +bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + return true; // Unknown modifier. + } + printSrcMemOperand(MI, OpNo, O); + return false; +} + +//===----------------------------------------------------------------------===// +void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { + MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this); + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + OutStreamer.EmitInstruction(TmpInst); +} + +static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI) { + if (SyntaxVariant == 0) + return new MSP430InstPrinter(MAI); + return 0; +} + +// Force static initialization. +extern "C" void LLVMInitializeMSP430AsmPrinter() { + RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target); + TargetRegistry::RegisterMCInstPrinter(TheMSP430Target, + createMSP430MCInstPrinter); +} diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp new file mode 100644 index 0000000..c99f4ab --- /dev/null +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -0,0 +1,223 @@ +//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MSP430 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "MSP430FrameLowering.h" +#include "MSP430InstrInfo.h" +#include "MSP430MachineFunctionInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + return (DisableFramePointerElim(MF) || + MF.getFrameInfo()->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); +} + +bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineFrameInfo *MFI = MF.getFrameInfo(); + MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); + const MSP430InstrInfo &TII = + *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo()); + + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t StackSize = MFI->getStackSize(); + + uint64_t NumBytes = 0; + if (hasFP(MF)) { + // Calculate required stack adjustment + uint64_t FrameSize = StackSize - 2; + NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize(); + + // Get the offset of the stack slot for the EBP register... which is + // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. + // Update the frame offset adjustment. + MFI->setOffsetAdjustment(-NumBytes); + + // Save FPW into the appropriate stack slot... + BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r)) + .addReg(MSP430::FPW, RegState::Kill); + + // Update FPW with the new base value... + BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW) + .addReg(MSP430::SPW); + + // Mark the FramePtr as live-in in every block except the entry. + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ++I) + I->addLiveIn(MSP430::FPW); + + } else + NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize(); + + // Skip the callee-saved push instructions. + while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r)) + ++MBBI; + + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + if (NumBytes) { // adjust stack pointer: SPW -= numbytes + // If there is an SUB16ri of SPW immediately before this instruction, merge + // the two. + //NumBytes -= mergeSPUpdates(MBB, MBBI, true); + // If there is an ADD16ri or SUB16ri of SPW immediately after this + // instruction, merge the two instructions. + // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); + + if (NumBytes) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(NumBytes); + // The SRW implicit def is dead. + MI->getOperand(3).setIsDead(); + } + } +} + +void MSP430FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); + const MSP430InstrInfo &TII = + *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo()); + + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc DL = MBBI->getDebugLoc(); + + switch (RetOpcode) { + case MSP430::RET: + case MSP430::RETI: break; // These are ok + default: + llvm_unreachable("Can only insert epilog into returning blocks"); + } + + // Get the number of bytes to allocate from the FrameInfo + uint64_t StackSize = MFI->getStackSize(); + unsigned CSSize = MSP430FI->getCalleeSavedFrameSize(); + uint64_t NumBytes = 0; + + if (hasFP(MF)) { + // Calculate required stack adjustment + uint64_t FrameSize = StackSize - 2; + NumBytes = FrameSize - CSSize; + + // pop FPW. + BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW); + } else + NumBytes = StackSize - CSSize; + + // Skip the callee-saved pop instructions. + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = prior(MBBI); + unsigned Opc = PI->getOpcode(); + if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator()) + break; + --MBBI; + } + + DL = MBBI->getDebugLoc(); + + // If there is an ADD16ri or SUB16ri of SPW immediately before this + // instruction, merge the two instructions. + //if (NumBytes || MFI->hasVarSizedObjects()) + // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); + + if (MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, DL, + TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW); + if (CSSize) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(MSP430::SUB16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(CSSize); + // The SRW implicit def is dead. + MI->getOperand(3).setIsDead(); + } + } else { + // adjust stack pointer back: SPW += numbytes + if (NumBytes) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(NumBytes); + // The SRW implicit def is dead. + MI->getOperand(3).setIsDead(); + } + } +} + +// FIXME: Can we eleminate these in favour of generic code? +bool +MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>(); + MFI->setCalleeSavedFrameSize(CSI.size() * 2); + + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r)) + .addReg(Reg, RegState::Kill); + } + return true; +} + +bool +MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) + BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg()); + + return true; +} diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h new file mode 100644 index 0000000..b636827 --- /dev/null +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -0,0 +1,53 @@ +//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430_FRAMEINFO_H +#define MSP430_FRAMEINFO_H + +#include "MSP430.h" +#include "MSP430Subtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class MSP430Subtarget; + +class MSP430FrameLowering : public TargetFrameLowering { +protected: + const MSP430Subtarget &STI; + +public: + explicit MSP430FrameLowering(const MSP430Subtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasFP(const MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 3395e9f..5430d43 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -60,15 +60,6 @@ namespace { return GV != 0 || CP != 0 || ES != 0 || JT != -1; } - bool hasBaseReg() const { - return Base.Reg.getNode() != 0; - } - - void setBaseReg(SDValue Reg) { - BaseType = RegBase; - Base.Reg = Reg; - } - void dump() { errs() << "MSP430ISelAddressMode " << this << '\n'; if (BaseType == RegBase && Base.Reg.getNode() != 0) { @@ -129,7 +120,7 @@ namespace { SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8, unsigned Opc16); - bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp); + bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp); }; } // end anonymous namespace @@ -254,7 +245,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. -bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, +bool MSP430DAGToDAGISel::SelectAddr(SDValue N, SDValue &Base, SDValue &Disp) { MSP430ISelAddressMode AM; @@ -272,7 +263,7 @@ bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, AM.Base.Reg; if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(), + Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(), MVT::i16, AM.Disp, 0/*AM.SymbolFlags*/); else if (AM.CP) @@ -298,7 +289,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, switch (ConstraintCode) { default: return true; case 'm': // memory - if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) + if (!SelectAddr(Op, Op0, Op1)) return true; break; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index a1703a3..30ef4f5 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -366,7 +366,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; if (ObjSize > 2) { errs() << "LowerFormalArguments Unhandled argument type: " - << VA.getLocVT().getSimpleVT().SimpleTy + << EVT(VA.getLocVT()).getEVTString() << "\n"; } // Create the frame index object for this incoming parameter... @@ -376,7 +376,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } @@ -507,8 +507,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), - VA.getLocMemOffset(), false, false, 0)); + MachinePointerInfo(),false, false, 0)); } } @@ -537,7 +536,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16); // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); @@ -748,7 +747,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, } TargetCC = DAG.getConstant(TCC, MVT::i8); - return DAG.getNode(MSP430ISD::CMP, dl, MVT::Flag, LHS, RHS); + return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS); } @@ -837,7 +836,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SR; } else { SDValue Zero = DAG.getConstant(0, VT); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SmallVector<SDValue, 4> Ops; Ops.push_back(One); Ops.push_back(Zero); @@ -859,7 +858,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SDValue TargetCC; SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SmallVector<SDValue, 4> Ops; Ops.push_back(TrueV); Ops.push_back(FalseV); @@ -914,13 +913,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0, false, false, 0); + MachinePointerInfo(), false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0, false, false, 0); + RetAddrFI, MachinePointerInfo(), false, false, 0); } SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, @@ -934,7 +933,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::FPW, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), false, false, 0); return FrameAddr; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index bfab844..424df13 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -40,8 +40,9 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, 0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); @@ -68,8 +69,9 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), - MachineMemOperand::MOLoad, 0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); @@ -99,48 +101,6 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); } -bool -MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>(); - MFI->setCalleeSavedFrameSize(CSI.size() * 2); - - for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - BuildMI(MBB, MI, DL, get(MSP430::PUSH16r)) - .addReg(Reg, RegState::Kill); - } - return true; -} - -bool -MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) - BuildMI(MBB, MI, DL, get(MSP430::POP16r), CSI[i].getReg()); - - return true; -} - unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 49ccc03..e885cd3 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -66,15 +66,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - unsigned GetInstSizeInBytes(const MachineInstr *MI) const; // Branch folding goodness diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 8792b22..59cb598 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -40,28 +40,28 @@ def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, // MSP430 Specific Node Definitions. //===----------------------------------------------------------------------===// def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>; def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>; def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>; + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def MSP430callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def MSP430callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MSP430CallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>; -def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>; +def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutGlue]>; def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, - [SDNPHasChain, SDNPInFlag]>; + [SDNPHasChain, SDNPInGlue]>; def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, - [SDNPInFlag]>; + [SDNPInGlue]>; def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>; def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>; def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>; diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp new file mode 100644 index 0000000..d1d9a11 --- /dev/null +++ b/lib/Target/MSP430/MSP430MCInstLower.cpp @@ -0,0 +1,150 @@ +//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower MSP430 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "MSP430MCInstLower.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + +MCSymbol *MSP430MCInstLower:: +GetGlobalAddressSymbol(const MachineOperand &MO) const { + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + return Printer.Mang->getSymbol(MO.getGlobal()); +} + +MCSymbol *MSP430MCInstLower:: +GetExternalSymbolSymbol(const MachineOperand &MO) const { + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); +} + +MCSymbol *MSP430MCInstLower:: +GetJumpTableSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI" + << Printer.getFunctionNumber() << '_' + << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *MSP430MCInstLower:: +GetConstantPoolIndexSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI" + << Printer.getFunctionNumber() << '_' + << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *MSP430MCInstLower:: +GetBlockAddressSymbol(const MachineOperand &MO) const { + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Printer.GetBlockAddressSymbol(MO.getBlockAddress()); +} + +MCOperand MSP430MCInstLower:: +LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case 0: break; + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + return MCOperand::CreateExpr(Expr); +} + +void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + assert(0 && "unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + break; + case MachineOperand::MO_BlockAddress: + MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); + } + + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h new file mode 100644 index 0000000..e937696 --- /dev/null +++ b/lib/Target/MSP430/MSP430MCInstLower.h @@ -0,0 +1,50 @@ +//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430_MCINSTLOWER_H +#define MSP430_MCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + class AsmPrinter; + class MCAsmInfo; + class MCContext; + class MCInst; + class MCOperand; + class MCSymbol; + class MachineInstr; + class MachineModuleInfoMachO; + class MachineOperand; + class Mangler; + + /// MSP430MCInstLower - This class is used to lower an MachineInstr + /// into an MCInst. +class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower { + MCContext &Ctx; + Mangler &Mang; + + AsmPrinter &Printer; +public: + MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) + : Ctx(ctx), Mang(mang), Printer(printer) {} + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; +}; + +} + +#endif diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 3c3fa73..1da6d8d 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -33,11 +33,12 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii) : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), TM(tm), TII(tii) { - StackAlign = TM.getFrameInfo()->getStackAlignment(); + StackAlign = TM.getFrameLowering()->getStackAlignment(); } const unsigned* MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering(); const Function* F = MF->getFunction(); static const unsigned CalleeSavedRegs[] = { MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, @@ -62,7 +63,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { 0 }; - if (hasFP(*MF)) + if (TFI->hasFP(*MF)) return (F->getCallingConv() == CallingConv::MSP430_INTR ? CalleeSavedRegsIntrFP : CalleeSavedRegsFP); else @@ -73,6 +74,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); // Mark 4 special registers as reserved. Reserved.set(MSP430::PCW); @@ -81,7 +83,7 @@ BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(MSP430::CGW); // Mark frame pointer as reserved if needed. - if (hasFP(MF)) + if (TFI->hasFP(MF)) Reserved.set(MSP430::FPW); return Reserved; @@ -92,23 +94,12 @@ MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const { return &MSP430::GR16RegClass; } - -bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - return (DisableFramePointerElim(MF) || - MF.getFrameInfo()->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); -} - -bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { - return !MF.getFrameInfo()->hasVarSizedObjects(); -} - void MSP430RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub SPW, <amt>' and the // adjcallstackdown instruction into 'add SPW, <amt>' @@ -172,6 +163,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); while (!MI.getOperand(i).isFI()) { ++i; @@ -180,13 +172,13 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); - unsigned BasePtr = (hasFP(MF) ? MSP430::FPW : MSP430::SPW); + unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); // Skip the saved PC Offset += 2; - if (!hasFP(MF)) + if (!TFI->hasFP(MF)) Offset += MF.getFrameInfo()->getStackSize(); else Offset += 2; // Skip the saved FPW @@ -224,8 +216,10 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, void MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + // Create a frame entry for the FPW register that must be saved. - if (hasFP(MF)) { + if (TFI->hasFP(MF)) { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); (void)FrameIdx; assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && @@ -233,144 +227,14 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) } } - -void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineFrameInfo *MFI = MF.getFrameInfo(); - MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI->getStackSize(); - - uint64_t NumBytes = 0; - if (hasFP(MF)) { - // Calculate required stack adjustment - uint64_t FrameSize = StackSize - 2; - NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize(); - - // Get the offset of the stack slot for the EBP register... which is - // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. - // Update the frame offset adjustment. - MFI->setOffsetAdjustment(-NumBytes); - - // Save FPW into the appropriate stack slot... - BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r)) - .addReg(MSP430::FPW, RegState::Kill); - - // Update FPW with the new base value... - BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW) - .addReg(MSP430::SPW); - - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); - I != E; ++I) - I->addLiveIn(MSP430::FPW); - - } else - NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize(); - - // Skip the callee-saved push instructions. - while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r)) - ++MBBI; - - if (MBBI != MBB.end()) - DL = MBBI->getDebugLoc(); - - if (NumBytes) { // adjust stack pointer: SPW -= numbytes - // If there is an SUB16ri of SPW immediately before this instruction, merge - // the two. - //NumBytes -= mergeSPUpdates(MBB, MBBI, true); - // If there is an ADD16ri or SUB16ri of SPW immediately after this - // instruction, merge the two instructions. - // mergeSPUpdatesDown(MBB, MBBI, &NumBytes); - - if (NumBytes) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW) - .addReg(MSP430::SPW).addImm(NumBytes); - // The SRW implicit def is dead. - MI->getOperand(3).setIsDead(); - } - } -} - -void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>(); - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc DL = MBBI->getDebugLoc(); - - switch (RetOpcode) { - case MSP430::RET: - case MSP430::RETI: break; // These are ok - default: - llvm_unreachable("Can only insert epilog into returning blocks"); - } - - // Get the number of bytes to allocate from the FrameInfo - uint64_t StackSize = MFI->getStackSize(); - unsigned CSSize = MSP430FI->getCalleeSavedFrameSize(); - uint64_t NumBytes = 0; - - if (hasFP(MF)) { - // Calculate required stack adjustment - uint64_t FrameSize = StackSize - 2; - NumBytes = FrameSize - CSSize; - - // pop FPW. - BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW); - } else - NumBytes = StackSize - CSSize; - - // Skip the callee-saved pop instructions. - while (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PI = prior(MBBI); - unsigned Opc = PI->getOpcode(); - if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator()) - break; - --MBBI; - } - - DL = MBBI->getDebugLoc(); - - // If there is an ADD16ri or SUB16ri of SPW immediately before this - // instruction, merge the two instructions. - //if (NumBytes || MFI->hasVarSizedObjects()) - // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); - - if (MFI->hasVarSizedObjects()) { - BuildMI(MBB, MBBI, DL, - TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW); - if (CSSize) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(MSP430::SUB16ri), MSP430::SPW) - .addReg(MSP430::SPW).addImm(CSSize); - // The SRW implicit def is dead. - MI->getOperand(3).setIsDead(); - } - } else { - // adjust stack pointer back: SPW += numbytes - if (NumBytes) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW) - .addReg(MSP430::SPW).addImm(NumBytes); - // The SRW implicit def is dead. - MI->getOperand(3).setIsDead(); - } - } -} - unsigned MSP430RegisterInfo::getRARegister() const { return MSP430::PCW; } unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return hasFP(MF) ? MSP430::FPW : MSP430::SPW; + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW; } int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 4d2795b..56744fa 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -39,9 +39,6 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; - bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -49,9 +46,6 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; // Debug information queries. diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td index f8aec66..ab7b59b 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.td +++ b/lib/Target/MSP430/MSP430RegisterInfo.td @@ -79,10 +79,10 @@ def GR8 : RegisterClass<"MSP430", [i8], 8, GR8Class::iterator GR8Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); // Depending on whether the function uses frame pointer or not, last 5 or 4 // registers on the list above are reserved - if (RI->hasFP(MF)) + if (TFI->hasFP(MF)) return end()-5; else return end()-4; @@ -106,10 +106,10 @@ def GR16 : RegisterClass<"MSP430", [i16], 16, GR16Class::iterator GR16Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); // Depending on whether the function uses frame pointer or not, last 5 or 4 // registers on the list above are reserved - if (RI->hasFP(MF)) + if (TFI->hasFP(MF)) return end()-5; else return end()-4; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 99877c8..fba9536 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -28,13 +28,13 @@ extern "C" void LLVMInitializeMSP430Target() { MSP430TargetMachine::MSP430TargetMachine(const Target &T, const std::string &TT, - const std::string &FS) : - LLVMTargetMachine(T, TT), - Subtarget(TT, FS), - // FIXME: Check TargetData string. - DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } + const std::string &FS) + : LLVMTargetMachine(T, TT), + Subtarget(TT, FS), + // FIXME: Check TargetData string. + DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), + FrameLowering(Subtarget) { } bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index b93edfd..cee3b04 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -17,11 +17,12 @@ #include "MSP430InstrInfo.h" #include "MSP430ISelLowering.h" +#include "MSP430FrameLowering.h" #include "MSP430SelectionDAGInfo.h" #include "MSP430RegisterInfo.h" #include "MSP430Subtarget.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -34,16 +35,15 @@ class MSP430TargetMachine : public LLVMTargetMachine { MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; MSP430SelectionDAGInfo TSInfo; - - // MSP430 does not have any call stack frame, therefore not having - // any MSP430 specific FrameInfo class. - TargetFrameInfo FrameInfo; + MSP430FrameLowering FrameLowering; public: MSP430TargetMachine(const Target &T, const std::string &TT, const std::string &FS); - virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout;} virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; } diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile index b1f33d6..fa4e80b 100644 --- a/lib/Target/MSP430/Makefile +++ b/lib/Target/MSP430/Makefile @@ -18,7 +18,7 @@ BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \ MSP430GenDAGISel.inc MSP430GenCallingConv.inc \ MSP430GenSubtarget.inc -DIRS = AsmPrinter TargetInfo +DIRS = InstPrinter TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt index 1d408d0..2d1aa9d 100644 --- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt +++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt @@ -4,4 +4,4 @@ add_llvm_library(LLVMMSP430Info MSP430TargetInfo.cpp ) -add_dependencies(LLVMMSP430Info MSP430Table_gen) +add_dependencies(LLVMMSP430Info MSP430CodeGenTable_gen) diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 49efe75..46c687b 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -224,16 +224,6 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, } } -/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix -/// and the specified global variable's name. If the global variable doesn't -/// have a name, this fills in a unique name for the global. -std::string Mangler::getNameWithPrefix(const GlobalValue *GV, - bool isImplicitlyPrivate) { - SmallString<64> Buf; - getNameWithPrefix(Buf, GV, isImplicitlyPrivate); - return std::string(Buf.begin(), Buf.end()); -} - /// getSymbol - Return the MCSymbol for the specified global value. This /// symbol is the main label that is the address of the global. MCSymbol *Mangler::getSymbol(const GlobalValue *GV) { diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt deleted file mode 100644 index d3099d2..0000000 --- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories( - ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - -add_llvm_library(LLVMMipsAsmPrinter - MipsAsmPrinter.cpp - ) -add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen) diff --git a/lib/Target/Mips/AsmPrinter/Makefile b/lib/Target/Mips/AsmPrinter/Makefile deleted file mode 100644 index b1efe9b..0000000 --- a/lib/Target/Mips/AsmPrinter/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMMipsAsmPrinter - -# Hack: we need to include 'main' Mips target directory to grab -# private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp deleted file mode 100644 index 6660f6b..0000000 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ /dev/null @@ -1,386 +0,0 @@ -//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format MIPS assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "mips-asm-printer" -#include "Mips.h" -#include "MipsSubtarget.h" -#include "MipsInstrInfo.h" -#include "MipsTargetMachine.h" -#include "MipsMachineFunction.h" -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class MipsAsmPrinter : public AsmPrinter { - const MipsSubtarget *Subtarget; - public: - explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<MipsSubtarget>(); - } - - virtual const char *getPassName() const { - return "Mips Assembly Printer"; - } - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); - void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); - void printSavedRegsBitmask(raw_ostream &O); - void printHex32(unsigned int Value, raw_ostream &O); - - const char *getCurrentABIString() const; - void emitFrameDirective(); - - void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd. - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd(); - virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; - static const char *getRegisterName(unsigned RegNo); - - virtual void EmitFunctionEntryLabel(); - void EmitStartOfAsmFile(Module &M); - }; -} // end of anonymous namespace - -#include "MipsGenAsmWriter.inc" - -//===----------------------------------------------------------------------===// -// -// Mips Asm Directives -// -// -- Frame directive "frame Stackpointer, Stacksize, RARegister" -// Describe the stack frame. -// -// -- Mask directives "(f)mask bitmask, offset" -// Tells the assembler which registers are saved and where. -// bitmask - contain a little endian bitset indicating which registers are -// saved on function prologue (e.g. with a 0x80000000 mask, the -// assembler knows the register 31 (RA) is saved at prologue. -// offset - the position before stack pointer subtraction indicating where -// the first saved register on prologue is located. (e.g. with a -// -// Consider the following function prologue: -// -// .frame $fp,48,$ra -// .mask 0xc0000000,-8 -// addiu $sp, $sp, -48 -// sw $ra, 40($sp) -// sw $fp, 36($sp) -// -// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and -// 30 (FP) are saved at prologue. As the save order on prologue is from -// left to right, RA is saved first. A -8 offset means that after the -// stack pointer subtration, the first register in the mask (RA) will be -// saved at address 48-8=40. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Mask directives -//===----------------------------------------------------------------------===// - -// Create a bitmask with all callee saved registers for CPU or Floating Point -// registers. For CPU registers consider RA, GP and FP for saving if necessary. -void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { - const TargetRegisterInfo &RI = *TM.getRegisterInfo(); - const MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); - - // CPU and FPU Saved Registers Bitmasks - unsigned int CPUBitmask = 0; - unsigned int FPUBitmask = 0; - - // Set the CPU and FPU Bitmasks - const MachineFrameInfo *MFI = MF->getFrameInfo(); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg); - if (Mips::CPURegsRegisterClass->contains(Reg)) - CPUBitmask |= (1 << RegNum); - else - FPUBitmask |= (1 << RegNum); - } - - // Return Address and Frame registers must also be set in CPUBitmask. - if (RI.hasFP(*MF)) - CPUBitmask |= (1 << MipsRegisterInfo:: - getRegisterNumbering(RI.getFrameRegister(*MF))); - - if (MFI->adjustsStack()) - CPUBitmask |= (1 << MipsRegisterInfo:: - getRegisterNumbering(RI.getRARegister())); - - // Print CPUBitmask - O << "\t.mask \t"; printHex32(CPUBitmask, O); - O << ',' << MipsFI->getCPUTopSavedRegOff() << '\n'; - - // Print FPUBitmask - O << "\t.fmask\t"; printHex32(FPUBitmask, O); O << "," - << MipsFI->getFPUTopSavedRegOff() << '\n'; -} - -// Print a 32 bit hex number with all numbers. -void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) { - O << "0x"; - for (int i = 7; i >= 0; i--) - O << utohexstr((Value & (0xF << (i*4))) >> (i*4)); -} - -//===----------------------------------------------------------------------===// -// Frame and Set directives -//===----------------------------------------------------------------------===// - -/// Frame Directive -void MipsAsmPrinter::emitFrameDirective() { - const TargetRegisterInfo &RI = *TM.getRegisterInfo(); - - unsigned stackReg = RI.getFrameRegister(*MF); - unsigned returnReg = RI.getRARegister(); - unsigned stackSize = MF->getFrameInfo()->getStackSize(); - - OutStreamer.EmitRawText("\t.frame\t$" + - Twine(LowercaseString(getRegisterName(stackReg))) + - "," + Twine(stackSize) + ",$" + - Twine(LowercaseString(getRegisterName(returnReg)))); -} - -/// Emit Set directives. -const char *MipsAsmPrinter::getCurrentABIString() const { - switch (Subtarget->getTargetABI()) { - case MipsSubtarget::O32: return "abi32"; - case MipsSubtarget::O64: return "abiO64"; - case MipsSubtarget::N32: return "abiN32"; - case MipsSubtarget::N64: return "abi64"; - case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64 - default: break; - } - - llvm_unreachable("Unknown Mips ABI"); - return NULL; -} - -void MipsAsmPrinter::EmitFunctionEntryLabel() { - OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); - OutStreamer.EmitLabel(CurrentFnSym); -} - -/// EmitFunctionBodyStart - Targets can override this to emit stuff before -/// the first basic block in the function. -void MipsAsmPrinter::EmitFunctionBodyStart() { - emitFrameDirective(); - - SmallString<128> Str; - raw_svector_ostream OS(Str); - printSavedRegsBitmask(OS); - OutStreamer.EmitRawText(OS.str()); -} - -/// EmitFunctionBodyEnd - Targets can override this to emit stuff after -/// the last basic block in the function. -void MipsAsmPrinter::EmitFunctionBodyEnd() { - // There are instruction for this macros, but they must - // always be at the function end, and we can't emit and - // break with BB logic. - OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); - OutStreamer.EmitRawText(StringRef("\t.set\treorder")); - OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); -} - - -/// isBlockOnlyReachableByFallthough - Return true if the basic block has -/// exactly one predecessor and the control transfer mechanism between -/// the predecessor and this block is a fall-through. -bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) - const { - // The predecessor has to be immediately before this block. - const MachineBasicBlock *Pred = *MBB->pred_begin(); - - // If the predecessor is a switch statement, assume a jump table - // implementation, so it is not a fall through. - if (const BasicBlock *bb = Pred->getBasicBlock()) - if (isa<SwitchInst>(bb->getTerminator())) - return false; - - return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); -} - -// Print out an operand for an inline asm expression. -bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant,const char *ExtraCode, - raw_ostream &O) { - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - - printOperand(MI, OpNo, O); - return false; -} - -void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - bool closeP = false; - - if (MO.getTargetFlags()) - closeP = true; - - switch(MO.getTargetFlags()) { - case MipsII::MO_GPREL: O << "%gp_rel("; break; - case MipsII::MO_GOT_CALL: O << "%call16("; break; - case MipsII::MO_GOT: - if (MI->getOpcode() == Mips::LW) - O << "%got("; - else - O << "%lo("; - break; - case MipsII::MO_ABS_HILO: - if (MI->getOpcode() == Mips::LUi) - O << "%hi("; - else - O << "%lo("; - break; - } - - switch (MO.getType()) { - case MachineOperand::MO_Register: - O << '$' << LowercaseString(getRegisterName(MO.getReg())); - break; - - case MachineOperand::MO_Immediate: - O << (short int)MO.getImm(); - break; - - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - break; - - case MachineOperand::MO_ExternalSymbol: - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - break; - - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - break; - - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" - << getFunctionNumber() << "_" << MO.getIndex(); - if (MO.getOffset()) - O << "+" << MO.getOffset(); - break; - - default: - llvm_unreachable("<unknown operand type>"); - } - - if (closeP) O << ")"; -} - -void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - if (MO.isImm()) - O << (unsigned short int)MO.getImm(); - else - printOperand(MI, opNum, O); -} - -void MipsAsmPrinter:: -printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier) { - // when using stack locations for not load/store instructions - // print the same way as all normal 3 operand instructions. - if (Modifier && !strcmp(Modifier, "stackloc")) { - printOperand(MI, opNum+1, O); - O << ", "; - printOperand(MI, opNum, O); - return; - } - - // Load/Store memory operands -- imm($reg) - // If PIC target the target is loaded as the - // pattern lw $25,%call16($28) - printOperand(MI, opNum, O); - O << "("; - printOperand(MI, opNum+1, O); - O << ")"; -} - -void MipsAsmPrinter:: -printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier) { - const MachineOperand& MO = MI->getOperand(opNum); - O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); -} - -void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { - // FIXME: Use SwitchSection. - - // Tell the assembler which ABI we are using - OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString())); - - // TODO: handle O64 ABI - if (Subtarget->isABI_EABI()) { - if (Subtarget->isGP32bit()) - OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32")); - else - OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64")); - } - - // return to previous section - OutStreamer.EmitRawText(StringRef("\t.previous")); -} - -// Force static initialization. -extern "C" void LLVMInitializeMipsAsmPrinter() { - RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget); - RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget); -} diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index a77802a..26df1a0 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -11,10 +11,12 @@ tablegen(MipsGenCallingConv.inc -gen-callingconv) tablegen(MipsGenSubtarget.inc -gen-subtarget) add_llvm_target(MipsCodeGen + MipsAsmPrinter.cpp MipsDelaySlotFiller.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp + MipsFrameLowering.cpp MipsMCAsmInfo.cpp MipsRegisterInfo.cpp MipsSubtarget.cpp @@ -23,4 +25,4 @@ add_llvm_target(MipsCodeGen MipsSelectionDAGInfo.cpp ) -target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG) +add_subdirectory(TargetInfo) diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 2ed8d77..d16b066 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -18,7 +18,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtarget.inc -DIRS = AsmPrinter TargetInfo +DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index a51c377..3e6437b 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -36,19 +36,15 @@ def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", "Support 64-bit FP registers.">; def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", "true", "Only supports single precision float">; -def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", - "Mips1 ISA Support">; -def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", - "Mips2 ISA Support">; def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32", "Enable o32 ABI">; def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; -def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", +def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", "true", "Enable vector FPU instructions.">; -def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true", +def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true", "Enable 'signext in register' instructions.">; -def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true", +def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true", "Enable 'conditional move' instructions.">; def FeatureMulDivAdd : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true", "Enable 'multiply add/sub' instructions.">; @@ -58,6 +54,16 @@ def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true", "Enable 'byte/half swap' instructions.">; def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true", "Enable 'count leading bits' instructions.">; +def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", + "Mips1 ISA Support">; +def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", + "Mips2 ISA Support">; +def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32", + "Mips32 ISA Support", + [FeatureCondMov, FeatureBitCount]>; +def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", + "Mips32r2", "Mips32r2 ISA Support", + [FeatureMips32, FeatureSEInReg]>; //===----------------------------------------------------------------------===// // Mips processors supported. @@ -73,10 +79,12 @@ def : Proc<"r3000", [FeatureMips1]>; def : Proc<"mips2", [FeatureMips2]>; def : Proc<"r6000", [FeatureMips2]>; -// Allegrex is a 32bit subset of r4000, both for interger and fp registers, -// but much more similar to Mips2 than Mips3. It also contains some of -// Mips32/Mips32r2 instructions and a custom vector fpu processor. -def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, +def : Proc<"4ke", [FeatureMips32r2]>; + +// Allegrex is a 32bit subset of r4000, both for interger and fp registers, +// but much more similar to Mips2 than Mips3. It also contains some of +// Mips32/Mips32r2 instructions and a custom vector fpu processor. +def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd, FeatureMinMax, FeatureSwap, FeatureBitCount]>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp new file mode 100644 index 0000000..bd28a9b --- /dev/null +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -0,0 +1,393 @@ +//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format MIPS assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-asm-printer" +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsInstrInfo.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instructions.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class MipsAsmPrinter : public AsmPrinter { + const MipsSubtarget *Subtarget; + public: + explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<MipsSubtarget>(); + } + + virtual const char *getPassName() const { + return "Mips Assembly Printer"; + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier = 0); + void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier = 0); + void printSavedRegsBitmask(raw_ostream &O); + void printHex32(unsigned int Value, raw_ostream &O); + + const char *getCurrentABIString() const; + void emitFrameDirective(); + + void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd. + void EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); + } + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + static const char *getRegisterName(unsigned RegNo); + + virtual void EmitFunctionEntryLabel(); + void EmitStartOfAsmFile(Module &M); + }; +} // end of anonymous namespace + +#include "MipsGenAsmWriter.inc" + +//===----------------------------------------------------------------------===// +// +// Mips Asm Directives +// +// -- Frame directive "frame Stackpointer, Stacksize, RARegister" +// Describe the stack frame. +// +// -- Mask directives "(f)mask bitmask, offset" +// Tells the assembler which registers are saved and where. +// bitmask - contain a little endian bitset indicating which registers are +// saved on function prologue (e.g. with a 0x80000000 mask, the +// assembler knows the register 31 (RA) is saved at prologue. +// offset - the position before stack pointer subtraction indicating where +// the first saved register on prologue is located. (e.g. with a +// +// Consider the following function prologue: +// +// .frame $fp,48,$ra +// .mask 0xc0000000,-8 +// addiu $sp, $sp, -48 +// sw $ra, 40($sp) +// sw $fp, 36($sp) +// +// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and +// 30 (FP) are saved at prologue. As the save order on prologue is from +// left to right, RA is saved first. A -8 offset means that after the +// stack pointer subtration, the first register in the mask (RA) will be +// saved at address 48-8=40. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Mask directives +//===----------------------------------------------------------------------===// + +// Create a bitmask with all callee saved registers for CPU or Floating Point +// registers. For CPU registers consider RA, GP and FP for saving if necessary. +void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { + const TargetFrameLowering *TFI = TM.getFrameLowering(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>(); + + // CPU and FPU Saved Registers Bitmasks + unsigned int CPUBitmask = 0; + unsigned int FPUBitmask = 0; + + // Set the CPU and FPU Bitmasks + const MachineFrameInfo *MFI = MF->getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg); + if (Mips::CPURegsRegisterClass->contains(Reg)) + CPUBitmask |= (1 << RegNum); + else + FPUBitmask |= (1 << RegNum); + } + + // Return Address and Frame registers must also be set in CPUBitmask. + // FIXME: Do we really need hasFP() call here? When no FP is present SP is + // just returned -- will it be ok? + if (TFI->hasFP(*MF)) + CPUBitmask |= (1 << MipsRegisterInfo:: + getRegisterNumbering(RI->getFrameRegister(*MF))); + + if (MFI->adjustsStack()) + CPUBitmask |= (1 << MipsRegisterInfo:: + getRegisterNumbering(RI->getRARegister())); + + // Print CPUBitmask + O << "\t.mask \t"; printHex32(CPUBitmask, O); + O << ',' << MipsFI->getCPUTopSavedRegOff() << '\n'; + + // Print FPUBitmask + O << "\t.fmask\t"; printHex32(FPUBitmask, O); O << "," + << MipsFI->getFPUTopSavedRegOff() << '\n'; +} + +// Print a 32 bit hex number with all numbers. +void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) { + O << "0x"; + for (int i = 7; i >= 0; i--) + O << utohexstr((Value & (0xF << (i*4))) >> (i*4)); +} + +//===----------------------------------------------------------------------===// +// Frame and Set directives +//===----------------------------------------------------------------------===// + +/// Frame Directive +void MipsAsmPrinter::emitFrameDirective() { + const TargetRegisterInfo &RI = *TM.getRegisterInfo(); + + unsigned stackReg = RI.getFrameRegister(*MF); + unsigned returnReg = RI.getRARegister(); + unsigned stackSize = MF->getFrameInfo()->getStackSize(); + + OutStreamer.EmitRawText("\t.frame\t$" + + Twine(LowercaseString(getRegisterName(stackReg))) + + "," + Twine(stackSize) + ",$" + + Twine(LowercaseString(getRegisterName(returnReg)))); +} + +/// Emit Set directives. +const char *MipsAsmPrinter::getCurrentABIString() const { + switch (Subtarget->getTargetABI()) { + case MipsSubtarget::O32: return "abi32"; + case MipsSubtarget::O64: return "abiO64"; + case MipsSubtarget::N32: return "abiN32"; + case MipsSubtarget::N64: return "abi64"; + case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64 + default: break; + } + + llvm_unreachable("Unknown Mips ABI"); + return NULL; +} + +void MipsAsmPrinter::EmitFunctionEntryLabel() { + OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); + OutStreamer.EmitLabel(CurrentFnSym); +} + +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void MipsAsmPrinter::EmitFunctionBodyStart() { + emitFrameDirective(); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + printSavedRegsBitmask(OS); + OutStreamer.EmitRawText(OS.str()); +} + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void MipsAsmPrinter::EmitFunctionBodyEnd() { + // There are instruction for this macros, but they must + // always be at the function end, and we can't emit and + // break with BB logic. + OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); + OutStreamer.EmitRawText(StringRef("\t.set\treorder")); + OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); +} + + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const { + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *MBB->pred_begin(); + + // If the predecessor is a switch statement, assume a jump table + // implementation, so it is not a fall through. + if (const BasicBlock *bb = Pred->getBasicBlock()) + if (isa<SwitchInst>(bb->getTerminator())) + return false; + + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + +// Print out an operand for an inline asm expression. +bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant,const char *ExtraCode, + raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + printOperand(MI, OpNo, O); + return false; +} + +void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + bool closeP = false; + + if (MO.getTargetFlags()) + closeP = true; + + switch(MO.getTargetFlags()) { + case MipsII::MO_GPREL: O << "%gp_rel("; break; + case MipsII::MO_GOT_CALL: O << "%call16("; break; + case MipsII::MO_GOT: { + const MachineOperand &LastMO = MI->getOperand(opNum-1); + bool LastMOIsGP = LastMO.getType() == MachineOperand::MO_Register + && LastMO.getReg() == Mips::GP; + if (MI->getOpcode() == Mips::LW || LastMOIsGP) + O << "%got("; + else + O << "%lo("; + break; + } + case MipsII::MO_ABS_HILO: + if (MI->getOpcode() == Mips::LUi) + O << "%hi("; + else + O << "%lo("; + break; + } + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << '$' << LowercaseString(getRegisterName(MO.getReg())); + break; + + case MachineOperand::MO_Immediate: + O << (short int)MO.getImm(); + break; + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" + << getFunctionNumber() << "_" << MO.getIndex(); + if (MO.getOffset()) + O << "+" << MO.getOffset(); + break; + + default: + llvm_unreachable("<unknown operand type>"); + } + + if (closeP) O << ")"; +} + +void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << (unsigned short int)MO.getImm(); + else + printOperand(MI, opNum, O); +} + +void MipsAsmPrinter:: +printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier) { + // when using stack locations for not load/store instructions + // print the same way as all normal 3 operand instructions. + if (Modifier && !strcmp(Modifier, "stackloc")) { + printOperand(MI, opNum+1, O); + O << ", "; + printOperand(MI, opNum, O); + return; + } + + // Load/Store memory operands -- imm($reg) + // If PIC target the target is loaded as the + // pattern lw $25,%call16($28) + printOperand(MI, opNum, O); + O << "("; + printOperand(MI, opNum+1, O); + O << ")"; +} + +void MipsAsmPrinter:: +printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier) { + const MachineOperand& MO = MI->getOperand(opNum); + O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); +} + +void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { + // FIXME: Use SwitchSection. + + // Tell the assembler which ABI we are using + OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString())); + + // TODO: handle O64 ABI + if (Subtarget->isABI_EABI()) { + if (Subtarget->isGP32bit()) + OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32")); + else + OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64")); + } + + // return to previous section + OutStreamer.EmitRawText(StringRef("\t.previous")); +} + +// Force static initialization. +extern "C" void LLVMInitializeMipsAsmPrinter() { + RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget); + RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget); +} diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 597ea0d..b44a0af 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -31,7 +31,7 @@ namespace { const TargetInstrInfo *TII; static char ID; - Filler(TargetMachine &tm) + Filler(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } virtual const char *getPassName() const { @@ -55,17 +55,22 @@ namespace { /// Currently, we fill delay slots with NOPs. We assume there is only one /// delay slot per delayed instruction. bool Filler:: -runOnMachineBasicBlock(MachineBasicBlock &MBB) +runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->getDesc().hasDelaySlot()) { + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { + const TargetInstrDesc& Tid = I->getDesc(); + if (Tid.hasDelaySlot() && + (TM.getSubtarget<MipsSubtarget>().isMips1() || + Tid.isCall() || Tid.isBranch() || Tid.isReturn())) { MachineBasicBlock::iterator J = I; ++J; BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP)); ++FilledSlots; Changed = true; } + } + return Changed; } diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp new file mode 100644 index 0000000..87a097a --- /dev/null +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -0,0 +1,314 @@ +//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "MipsFrameLowering.h" +#include "MipsInstrInfo.h" +#include "MipsMachineFunction.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + + +//===----------------------------------------------------------------------===// +// +// Stack Frame Processing methods +// +----------------------------+ +// +// The stack is allocated decrementing the stack pointer on +// the first instruction of a function prologue. Once decremented, +// all stack references are done thought a positive offset +// from the stack/frame pointer, so the stack is considering +// to grow up! Otherwise terrible hacks would have to be made +// to get this stack ABI compliant :) +// +// The stack frame required by the ABI (after call): +// Offset +// +// 0 ---------- +// 4 Args to pass +// . saved $GP (used in PIC) +// . Alloca allocations +// . Local Area +// . CPU "Callee Saved" Registers +// . saved FP +// . saved RA +// . FPU "Callee Saved" Registers +// StackSize ----------- +// +// Offset - offset from sp after stack allocation on function prologue +// +// The sp is the stack pointer subtracted/added from the stack size +// at the Prologue/Epilogue +// +// References to the previous stack (to obtain arguments) are done +// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1)) +// +// Examples: +// - reference to the actual stack frame +// for any local area var there is smt like : FI >= 0, StackOffset: 4 +// sw REGX, 4(SP) +// +// - reference to previous stack frame +// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16. +// The emitted instruction will be something like: +// lw REGX, 16+StackSize(SP) +// +// Since the total stack size is unknown on LowerFormalArguments, all +// stack references (ObjectOffset) created to reference the function +// arguments, are negative numbers. This way, on eliminateFrameIndex it's +// possible to detect those references and the offsets are adjusted to +// their real location. +// +//===----------------------------------------------------------------------===// + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); +} + +void MipsFrameLowering::adjustMipsStackFrame(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + unsigned StackAlign = getStackAlignment(); + unsigned RegSize = STI.isGP32bit() ? 4 : 8; + bool HasGP = MipsFI->needGPSaveRestore(); + + // Min and Max CSI FrameIndex. + int MinCSFI = -1, MaxCSFI = -1; + + // See the description at MipsMachineFunction.h + int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1; + + // Replace the dummy '0' SPOffset by the negative offsets, as explained on + // LowerFormalArguments. Leaving '0' for while is necessary to avoid the + // approach done by calculateFrameObjectOffsets to the stack frame. + MipsFI->adjustLoadArgsFI(MFI); + MipsFI->adjustStoreVarArgsFI(MFI); + + // It happens that the default stack frame allocation order does not directly + // map to the convention used for mips. So we must fix it. We move the callee + // save register slots after the local variables area, as described in the + // stack frame above. + unsigned CalleeSavedAreaSize = 0; + if (!CSI.empty()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size()-1].getFrameIdx(); + } + for (unsigned i = 0, e = CSI.size(); i != e; ++i) + CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx()); + + unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize) + : (STI.isABI_O32() ? 16 : 0); + + // Adjust local variables. They should come on the stack right + // after the arguments. + int LastOffsetFI = -1; + for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (i >= MinCSFI && i <= MaxCSFI) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + unsigned Offset = + StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize; + if (LastOffsetFI == -1) + LastOffsetFI = i; + if (Offset > MFI->getObjectOffset(LastOffsetFI)) + LastOffsetFI = i; + MFI->setObjectOffset(i, Offset); + } + + // Adjust CPU Callee Saved Registers Area. Registers RA and FP must + // be saved in this CPU Area. This whole area must be aligned to the + // default Stack Alignment requirements. + if (LastOffsetFI >= 0) + StackOffset = MFI->getObjectOffset(LastOffsetFI)+ + MFI->getObjectSize(LastOffsetFI); + StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); + + for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { + unsigned Reg = CSI[i].getReg(); + if (!Mips::CPURegsRegisterClass->contains(Reg)) + break; + MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); + } + + // Stack locations for FP and RA. If only one of them is used, + // the space must be allocated for both, otherwise no space at all. + if (hasFP(MF) || MFI->adjustsStack()) { + // FP stack location + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), + StackOffset); + MipsFI->setFPStackOffset(StackOffset); + TopCPUSavedRegOff = StackOffset; + StackOffset += RegSize; + + // SP stack location + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), + StackOffset); + MipsFI->setRAStackOffset(StackOffset); + StackOffset += RegSize; + + if (MFI->adjustsStack()) + TopCPUSavedRegOff += RegSize; + } + + StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); + + // Adjust FPU Callee Saved Registers Area. This Area must be + // aligned to the default Stack Alignment requirements. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (Mips::CPURegsRegisterClass->contains(Reg)) + continue; + MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); + TopFPUSavedRegOff = StackOffset; + StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); + } + StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); + + // Update frame info + MFI->setStackSize(StackOffset); + + // Recalculate the final tops offset. The final values must be '0' + // if there isn't a callee saved register for CPU or FPU, otherwise + // a negative offset is needed. + if (TopCPUSavedRegOff >= 0) + MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); + + if (TopFPUSavedRegOff >= 0) + MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset); +} + +void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + const MipsRegisterInfo *RegInfo = + static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const MipsInstrInfo &TII = + *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); + + // Get the right frame order for Mips. + adjustMipsStackFrame(MF); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->adjustsStack()) return; + + int FPOffset = MipsFI->getFPStackOffset(); + int RAOffset = MipsFI->getRAStackOffset(); + + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); + + // TODO: check need from GP here. + if (isPIC && STI.isABI_O32()) + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)) + .addReg(RegInfo->getPICCallReg()); + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); + + // Adjust stack : addi sp, sp, (-imm) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(-StackSize); + + // Save the return address only if the function isnt a leaf one. + // sw $ra, stack_loc($sp) + if (MFI->adjustsStack()) { + BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) + .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP); + } + + // if framepointer enabled, save it and set it + // to point to the stack pointer + if (hasFP(MF)) { + // sw $fp,stack_loc($sp) + BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) + .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP); + + // move $fp, $sp + BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP) + .addReg(Mips::SP).addReg(Mips::ZERO); + } + + // Restore GP from the saved stack location + if (MipsFI->needGPSaveRestore()) + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)) + .addImm(MipsFI->getGPStackOffset()); +} + +void MipsFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + const MipsInstrInfo &TII = + *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + + // Get the number of bytes from FrameInfo + int NumBytes = (int) MFI->getStackSize(); + + // Get the FI's where RA and FP are saved. + int FPOffset = MipsFI->getFPStackOffset(); + int RAOffset = MipsFI->getRAStackOffset(); + + // if framepointer enabled, restore it and restore the + // stack pointer + if (hasFP(MF)) { + // move $sp, $fp + BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP) + .addReg(Mips::FP).addReg(Mips::ZERO); + + // lw $fp,stack_loc($sp) + BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP) + .addImm(FPOffset).addReg(Mips::SP); + } + + // Restore the return address only if the function isnt a leaf one. + // lw $ra, stack_loc($sp) + if (MFI->adjustsStack()) { + BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA) + .addImm(RAOffset).addReg(Mips::SP); + } + + // adjust stack : insert addi sp, sp, (imm) + if (NumBytes) { + BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(NumBytes); + } +} + +void MipsFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + const MipsRegisterInfo *RegInfo = + static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); + RegInfo->processFunctionBeforeFrameFinalized(MF); +} diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h new file mode 100644 index 0000000..a8426c1 --- /dev/null +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -0,0 +1,48 @@ +//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHA_FRAMEINFO_H +#define ALPHA_FRAMEINFO_H + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class MipsSubtarget; + +class MipsFrameLowering : public TargetFrameLowering { +protected: + const MipsSubtarget &STI; + +public: + explicit MipsFrameLowering(const MipsSubtarget &sti) + // FIXME: Is this correct at all? + : TargetFrameLowering(StackGrowsUp, 8, 0), STI(sti) { + } + + void adjustMipsStackFrame(MachineFunction &MF) const; + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index a47cf7b..755e04d 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -84,8 +84,7 @@ private: SDNode *Select(SDNode *N); // Complex Pattern. - bool SelectAddr(SDNode *Op, SDValue N, - SDValue &Base, SDValue &Offset); + bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset); SDNode *SelectLoadFp64(SDNode *N); SDNode *SelectStoreFp64(SDNode *N); @@ -110,8 +109,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel:: -SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) -{ +SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); @@ -193,7 +191,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { SDValue N1 = N->getOperand(1); SDValue Offset0, Offset1, Base; - if (!SelectAddr(N, N1, Offset0, Base) || + if (!SelectAddr(N1, Offset0, Base) || N1.getValueType() != MVT::i32) return NULL; @@ -257,7 +255,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { SDValue N2 = N->getOperand(2); SDValue Offset0, Offset1, Base; - if (!SelectAddr(N, N2, Offset0, Base) || + if (!SelectAddr(N2, Offset0, Base) || N1.getValueType() != MVT::f64 || N2.getValueType() != MVT::i32) return NULL; @@ -327,7 +325,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { case ISD::SUBE: case ISD::ADDE: { SDValue InFlag = Node->getOperand(2), CmpLHS; - unsigned Opc = InFlag.getOpcode(); Opc=Opc; + unsigned Opc = InFlag.getOpcode(); (void)Opc; assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || (Opc == ISD::SUBC || Opc == ISD::SUBE)) && "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); @@ -351,7 +349,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, SDValue(Carry,0), RHS); - return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Flag, + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry,0)); } @@ -369,11 +367,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { else Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV); - SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2); + SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); SDValue InFlag = SDValue(MulDiv, 0); SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, - MVT::Flag, InFlag); + MVT::Glue, InFlag); InFlag = SDValue(Lo,1); SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); @@ -388,6 +386,8 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Special Muls case ISD::MUL: + if (Subtarget.isMips32()) + break; case ISD::MULHS: case ISD::MULHU: { SDValue MulOp1 = Node->getOperand(0); @@ -395,7 +395,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, - MVT::Flag, MulOp1, MulOp2); + MVT::Glue, MulOp1, MulOp2); SDValue InFlag = SDValue(MulNode, 0); @@ -421,7 +421,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu); MOp = Mips::MFHI; } - SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2); + SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); SDValue InFlag = SDValue(Node, 0); return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag); @@ -474,7 +474,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue InFlag; // Skip the incomming flag if present - if (Node->getOperand(LastOpNum).getValueType() == MVT::Flag) + if (Node->getOperand(LastOpNum).getValueType() == MVT::Glue) LastOpNum--; if ( (isa<GlobalAddressSDNode>(Callee)) || @@ -496,7 +496,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Callee, InFlag); // Map the JmpLink operands to JALR - SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(CurDAG->getRegister(Mips::T9, MVT::i32)); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b0b99ba..1d7a1c0 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -41,12 +41,15 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::Lo : return "MipsISD::Lo"; case MipsISD::GPRel : return "MipsISD::GPRel"; case MipsISD::Ret : return "MipsISD::Ret"; - case MipsISD::CMov : return "MipsISD::CMov"; case MipsISD::SelectCC : return "MipsISD::SelectCC"; case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC"; case MipsISD::FPBrcond : return "MipsISD::FPBrcond"; case MipsISD::FPCmp : return "MipsISD::FPCmp"; case MipsISD::FPRound : return "MipsISD::FPRound"; + case MipsISD::MAdd : return "MipsISD::MAdd"; + case MipsISD::MAddu : return "MipsISD::MAddu"; + case MipsISD::MSub : return "MipsISD::MSub"; + case MipsISD::MSubu : return "MipsISD::MSubu"; default : return NULL; } } @@ -57,7 +60,7 @@ MipsTargetLowering(MipsTargetMachine &TM) Subtarget = &TM.getSubtarget<MipsSubtarget>(); // Mips does not have i1 type, so use i32 for - // setcc operations results (slt, sgt, ...). + // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); // Set up the register classes @@ -69,7 +72,7 @@ MipsTargetLowering(MipsTargetMachine &TM) if (!Subtarget->isFP64bit()) addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass); - // Load extented operations for i1 types must be promoted + // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -78,9 +81,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); - // Used by legalize types to correctly generate the setcc result. - // Without this, every float setcc comes with a AND/OR with the result, - // we don't want this, since the fpcmp result goes to a flag register, + // Used by legalize types to correctly generate the setcc result. + // Without this, every float setcc comes with a AND/OR with the result, + // we don't want this, since the fpcmp result goes to a flag register, // which is used implicitly by brcond and select operations. AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); @@ -100,8 +103,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VASTART, MVT::Other, Custom); - // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' - // with operands comming from setcc fp comparions. This is necessary since + // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' + // with operands comming from setcc fp comparions. This is necessary since // the result from these setcc are in a flag registers (FCR31). setOperationAction(ISD::AND, MVT::i32, Custom); setOperationAction(ISD::OR, MVT::i32, Custom); @@ -116,7 +119,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTR, MVT::i32, Expand); + + if (!Subtarget->isMips32r2()) + setOperationAction(ISD::ROTR, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); @@ -152,6 +158,9 @@ MipsTargetLowering(MipsTargetMachine &TM) if (!Subtarget->hasSwap()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setTargetDAGCombine(ISD::ADDE); + setTargetDAGCombine(ISD::SUBE); + setStackPointerRegisterToSaveRestore(Mips::SP); computeRegisterProperties(); } @@ -165,10 +174,198 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const { return 2; } +// SelectMadd - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc multLo, Lo0), (adde multHi, Hi0), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if mattern matching was successful. +static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { + // ADDENode's second operand must be a flag output of an ADDC node in order + // for the matching to be successful. + SDNode* ADDCNode = ADDENode->getOperand(2).getNode(); + + if (ADDCNode->getOpcode() != ISD::ADDC) + return false; + + SDValue MultHi = ADDENode->getOperand(0); + SDValue MultLo = ADDCNode->getOperand(0); + SDNode* MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MADD only if ADDENode and ADDCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than ADDENode or ADDCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MADD instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + SDValue Chain = CurDAG->getEntryNode(); + DebugLoc dl = ADDENode->getDebugLoc(); + + // create MipsMAdd(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; + + SDValue MAdd = CurDAG->getNode(MultOpc, dl, + MVT::Glue, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ADDCNode->getOperand(1),// Lo0 + ADDENode->getOperand(1));// Hi0 + + // create CopyFromReg nodes + SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32, + MAdd); + SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl, + Mips::HI, MVT::i32, + CopyFromLo.getValue(2)); + + // replace uses of adde and addc here + if (!SDValue(ADDCNode, 0).use_empty()) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo); + + if (!SDValue(ADDENode, 0).use_empty()) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi); + + return true; +} + +// SelectMsub - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc Lo0, multLo), (sube Hi0, multHi), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if mattern matching was successful. +static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { + // SUBENode's second operand must be a flag output of an SUBC node in order + // for the matching to be successful. + SDNode* SUBCNode = SUBENode->getOperand(2).getNode(); + + if (SUBCNode->getOpcode() != ISD::SUBC) + return false; + + SDValue MultHi = SUBENode->getOperand(1); + SDValue MultLo = SUBCNode->getOperand(1); + SDNode* MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MSUB only if SUBENode and SUBCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than SUBENode or SUBCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MSUB instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + SDValue Chain = CurDAG->getEntryNode(); + DebugLoc dl = SUBENode->getDebugLoc(); + + // create MipsSub(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; + + SDValue MSub = CurDAG->getNode(MultOpc, dl, + MVT::Glue, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + SUBCNode->getOperand(0),// Lo0 + SUBENode->getOperand(0));// Hi0 + + // create CopyFromReg nodes + SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32, + MSub); + SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl, + Mips::HI, MVT::i32, + CopyFromLo.getValue(2)); + + // replace uses of sube and subc here + if (!SDValue(SUBCNode, 0).use_empty()) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo); + + if (!SDValue(SUBENode, 0).use_empty()) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi); + + return true; +} + +static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget->isMips32() && SelectMadd(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + +static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget->isMips32() && SelectMsub(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + +SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) + const { + SelectionDAG &DAG = DCI.DAG; + unsigned opc = N->getOpcode(); + + switch (opc) { + default: break; + case ISD::ADDE: + return PerformADDECombine(N, DAG, DCI, Subtarget); + case ISD::SUBE: + return PerformSUBECombine(N, DAG, DCI, Subtarget); + } + + return SDValue(); +} + SDValue MipsTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) + switch (Op.getOpcode()) { case ISD::AND: return LowerANDOR(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); @@ -194,7 +391,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const // MachineFunction as a live in value. It also creates a corresponding // virtual register for it. static unsigned -AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC) +AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC) { assert(RC->contains(PReg) && "Not the correct regclass!"); unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); @@ -212,7 +409,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { return Mips::BRANCH_INVALID; } - + static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) { switch(BC) { default: @@ -227,24 +424,24 @@ static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) { static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown fp condition code!"); - case ISD::SETEQ: + case ISD::SETEQ: case ISD::SETOEQ: return Mips::FCOND_EQ; case ISD::SETUNE: return Mips::FCOND_OGL; - case ISD::SETLT: + case ISD::SETLT: case ISD::SETOLT: return Mips::FCOND_OLT; - case ISD::SETGT: + case ISD::SETGT: case ISD::SETOGT: return Mips::FCOND_OGT; - case ISD::SETLE: - case ISD::SETOLE: return Mips::FCOND_OLE; + case ISD::SETLE: + case ISD::SETOLE: return Mips::FCOND_OLE; case ISD::SETGE: case ISD::SETOGE: return Mips::FCOND_OGE; case ISD::SETULT: return Mips::FCOND_ULT; - case ISD::SETULE: return Mips::FCOND_ULE; + case ISD::SETULE: return Mips::FCOND_ULE; case ISD::SETUGT: return Mips::FCOND_UGT; case ISD::SETUGE: return Mips::FCOND_UGE; - case ISD::SETUO: return Mips::FCOND_UN; + case ISD::SETUO: return Mips::FCOND_UN; case ISD::SETO: return Mips::FCOND_OR; - case ISD::SETNE: + case ISD::SETNE: case ISD::SETONE: return Mips::FCOND_NEQ; case ISD::SETUEQ: return Mips::FCOND_UEQ; } @@ -364,7 +561,7 @@ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const // Emit the round instruction and bit convert to integer SDValue Trunc = DAG.getNode(MipsISD::FPRound, dl, MVT::f32, Src, CondReg.getValue(1)); - SDValue BitCvt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Trunc); + SDValue BitCvt = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Trunc); return BitCvt; } @@ -382,11 +579,11 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const // obtain the new stack size. SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size); - // The Sub result contains the new stack start address, so it + // The Sub result contains the new stack start address, so it // must be placed in the stack pointer register. Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub); - - // This node always has two return values: a new stack pointer + + // This node always has two return values: a new stack pointer // value and a chain SDValue Ops[2] = { Sub, Chain }; return DAG.getMergeValues(Ops, 2, dl); @@ -405,9 +602,9 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG) const SDValue True = DAG.getConstant(1, MVT::i32); SDValue False = DAG.getConstant(0, MVT::i32); - SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), + SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), LHS, True, False, LHS.getOperand(2)); - SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), + SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), RHS, True, False, RHS.getOperand(2)); return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL); @@ -416,7 +613,7 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG) const SDValue MipsTargetLowering:: LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { - // The first operand is the chain, the second is the condition, the third is + // The first operand is the chain, the second is the condition, the third is // the block to branch to if the condition is true. SDValue Chain = Op.getOperand(0); SDValue Dest = Op.getOperand(2); @@ -424,55 +621,55 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp) return Op; - + SDValue CondRes = Op.getOperand(1); SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue(); - SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32); + SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32); - return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode, + return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode, Dest, CondRes); } SDValue MipsTargetLowering:: LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - // The operands to this are the left and right operands to compare (ops #0, - // and #1) and the condition code to compare them with (op #2) as a + // The operands to this are the left and right operands to compare (ops #0, + // and #1) and the condition code to compare them with (op #2) as a // CondCodeSDNode. - SDValue LHS = Op.getOperand(0); + SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - - return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS, + + return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS, DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); } SDValue MipsTargetLowering:: LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDValue Cond = Op.getOperand(0); + SDValue Cond = Op.getOperand(0); SDValue True = Op.getOperand(1); SDValue False = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - // if the incomming condition comes from a integer compare, the select - // operation must be SelectCC or a conditional move if the subtarget + // if the incomming condition comes from a integer compare, the select + // operation must be SelectCC or a conditional move if the subtarget // supports it. if (Cond.getOpcode() != MipsISD::FPCmp) { if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint()) return Op; - return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(), + return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(), Cond, True, False); } // if the incomming condition comes from fpcmp, the select // operation must use FPSelectCC. SDValue CCNode = Cond.getOperand(2); - return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), + return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), Cond, True, False, CCNode); } @@ -484,16 +681,16 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { SDVTList VTs = DAG.getVTList(MVT::i32); - + MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering(); - + // %gp_rel relocation - if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GPREL); SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); - return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); + return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); } // %hi/%lo relocation SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, @@ -505,8 +702,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, } else { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GOT); - SDValue ResNode = DAG.getLoad(MVT::i32, dl, - DAG.getEntryNode(), GA, NULL, 0, + SDValue ResNode = DAG.getLoad(MVT::i32, dl, + DAG.getEntryNode(), GA, MachinePointerInfo(), false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. @@ -531,7 +728,7 @@ SDValue MipsTargetLowering:: LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; - SDValue HiPart; + SDValue HiPart; // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; @@ -546,7 +743,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const SDValue Ops[] = { JTI }; HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); } else // Emit Load from Global Pointer - HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0, + HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, + MachinePointerInfo(), false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); @@ -565,26 +763,27 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const DebugLoc dl = Op.getDebugLoc(); // gp_rel relocation - // FIXME: we should reference the constant pool using small data sections, + // FIXME: we should reference the constant pool using small data sections, // but the asm printer currently doens't support this feature without - // hacking it. This feature should come soon so we can uncomment the + // hacking it. This feature should come soon so we can uncomment the // stuff below. //if (IsInSmallSection(C->getType())) { // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP); // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); - // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); + // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), N->getOffset(), MipsII::MO_ABS_HILO); SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), N->getOffset(), MipsII::MO_GOT); - SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), - CP, NULL, 0, false, false, 0); + SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), + CP, MachinePointerInfo::getConstantPool(), + false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); } @@ -603,7 +802,8 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0, + return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); } @@ -614,23 +814,23 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { #include "MipsGenCallingConv.inc" //===----------------------------------------------------------------------===// -// TODO: Implement a generic logic using tblgen that can support this. +// TODO: Implement a generic logic using tblgen that can support this. // Mips O32 ABI rules: // --- // i32 - Passed in A0, A1, A2, A3 and stack -// f32 - Only passed in f32 registers if no int reg has been used yet to hold +// f32 - Only passed in f32 registers if no int reg has been used yet to hold // an argument. Otherwise, passed in A1, A2, A3 and stack. -// f64 - Only passed in two aliased f32 registers if no int reg has been used -// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is +// f64 - Only passed in two aliased f32 registers if no int reg has been used +// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is avaiable, shadow it and // go to stack. //===----------------------------------------------------------------------===// -static bool CC_MipsO32(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, +static bool CC_MipsO32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const unsigned IntRegsSize=4, FloatRegsSize=2; + static const unsigned IntRegsSize=4, FloatRegsSize=2; static const unsigned IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 @@ -642,9 +842,15 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT, Mips::D6, Mips::D7 }; - unsigned Reg=0; - unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize); - bool IntRegUsed = (IntRegs[UnallocIntReg] != (unsigned (Mips::A0))); + unsigned Reg = 0; + static bool IntRegUsed = false; + + // This must be the first arg of the call if no regs have been allocated. + // Initialize IntRegUsed in that case. + if (IntRegs[State.getFirstUnallocated(IntRegs, IntRegsSize)] == Mips::A0 && + F32Regs[State.getFirstUnallocated(F32Regs, FloatRegsSize)] == Mips::F12 && + F64Regs[State.getFirstUnallocated(F64Regs, FloatRegsSize)] == Mips::D6) + IntRegUsed = false; // Promote i8 and i16 if (LocVT == MVT::i8 || LocVT == MVT::i16) { @@ -657,30 +863,48 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT, LocInfo = CCValAssign::AExt; } - if (ValVT == MVT::i32 || (ValVT == MVT::f32 && IntRegUsed)) { + if (ValVT == MVT::i32) { Reg = State.AllocateReg(IntRegs, IntRegsSize); IntRegUsed = true; - LocVT = MVT::i32; - } - - if (ValVT.isFloatingPoint() && !IntRegUsed) { - if (ValVT == MVT::f32) - Reg = State.AllocateReg(F32Regs, FloatRegsSize); - else - Reg = State.AllocateReg(F64Regs, FloatRegsSize); - } + } else if (ValVT == MVT::f32) { + // An int reg has to be marked allocated regardless of whether or not + // IntRegUsed is true. + Reg = State.AllocateReg(IntRegs, IntRegsSize); - if (ValVT == MVT::f64 && IntRegUsed) { - if (UnallocIntReg != IntRegsSize) { - // If we hit register A3 as the first not allocated, we must - // mark it as allocated (shadow) and use the stack instead. - if (IntRegs[UnallocIntReg] != (unsigned (Mips::A3))) - Reg = Mips::A2; - for (;UnallocIntReg < IntRegsSize; ++UnallocIntReg) - State.AllocateReg(UnallocIntReg); - } - LocVT = MVT::i32; - } + if (IntRegUsed) { + if (Reg) // Int reg is available + LocVT = MVT::i32; + } else { + unsigned FReg = State.AllocateReg(F32Regs, FloatRegsSize); + if (FReg) // F32 reg is available + Reg = FReg; + else if (Reg) // No F32 regs are available, but an int reg is available. + LocVT = MVT::i32; + } + } else if (ValVT == MVT::f64) { + // Int regs have to be marked allocated regardless of whether or not + // IntRegUsed is true. + Reg = State.AllocateReg(IntRegs, IntRegsSize); + if (Reg == Mips::A1) + Reg = State.AllocateReg(IntRegs, IntRegsSize); + else if (Reg == Mips::A3) + Reg = 0; + State.AllocateReg(IntRegs, IntRegsSize); + + // At this point, Reg is A0, A2 or 0, and all the unavailable integer regs + // are marked as allocated. + if (IntRegUsed) { + if (Reg)// if int reg is available + LocVT = MVT::i32; + } else { + unsigned FReg = State.AllocateReg(F64Regs, FloatRegsSize); + if (FReg) // F64 reg is available. + Reg = FReg; + else if (Reg) // No F64 regs are available, but an int reg is available. + LocVT = MVT::i32; + } + } else + assert(false && "cannot handle this ValVT"); if (!Reg) { unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; @@ -692,8 +916,8 @@ static bool CC_MipsO32(unsigned ValNo, EVT ValVT, return false; // CC must always match } -static bool CC_MipsO32_VarArgs(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, +static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { static const unsigned IntRegsSize=4; @@ -736,7 +960,7 @@ static bool CC_MipsO32_VarArgs(unsigned ValNo, EVT ValVT, IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) { unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize); State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); - // Shadow the next register so it can be used + // Shadow the next register so it can be used // later to get the other 32bit part. State.AllocateReg(IntRegs, IntRegsSize); return false; @@ -786,13 +1010,13 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // To meet O32 ABI, Mips must always allocate 16 bytes on // the stack (even if less than 4 are used as arguments) if (Subtarget->isABI_O32()) { - int VTsize = EVT(MVT::i32).getSizeInBits()/8; + int VTsize = MVT(MVT::i32).getSizeInBits()/8; MFI->CreateFixedObject(VTsize, (VTsize*3), true); - CCInfo.AnalyzeCallOperands(Outs, + CCInfo.AnalyzeCallOperands(Outs, isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); } else CCInfo.AnalyzeCallOperands(Outs, CC_Mips); - + // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); @@ -801,7 +1025,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; - // First/LastArgStackLoc contains the first/last + // First/LastArgStackLoc contains the first/last // "at stack" argument location. int LastArgStackLoc = 0; unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16); @@ -814,12 +1038,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: + case CCValAssign::Full: if (Subtarget->isABI_O32() && VA.isRegLoc()) { if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32) - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) { - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, DAG.getConstant(0, getPointerTy())); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, @@ -827,7 +1051,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi)); continue; - } + } } break; case CCValAssign::SExt: @@ -840,17 +1064,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } - - // Arguments that can be passed on register must be kept at + + // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; } - + // Register can't get to this point... assert(VA.isMemLoc()); - + // Create the frame index object for this incoming parameter // This guarantees that when allocating Local Area the firsts // 16 bytes which are alwayes reserved won't be overwritten @@ -861,50 +1085,51 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); - // emit ISD::STORE whichs stores the + // emit ISD::STORE whichs stores the // parameter value to a stack Location - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0)); } // Transform all store nodes into one single node because all store // nodes are independent of each other. - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes chained together with token + // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emited instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlag); // MipsJmpLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... + // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); - // Add argument registers to the end of the list so that they are + // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, @@ -916,17 +1141,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - // Create a stack location to hold GP when PIC is used. This stack - // location is used on function prologue to save GP and also after all - // emited CALL's to restore GP. + // Create a stack location to hold GP when PIC is used. This stack + // location is used on function prologue to save GP and also after all + // emited CALL's to restore GP. if (IsPIC) { - // Function can have an arbitrary number of calls, so + // Function can have an arbitrary number of calls, so // hold the LastArgStackLoc with the biggest offset. int FI; MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); if (LastArgStackLoc >= MipsFI->getGPStackOffset()) { LastArgStackLoc = (!LastArgStackLoc) ? (16) : (LastArgStackLoc+4); - // Create the frame index only once. SPOffset here can be anything + // Create the frame index only once. SPOffset here can be anything // (this will be fixed on processFunctionBeforeFrameFinalized) if (MipsFI->getGPStackOffset() == -1) { FI = MFI->CreateFixedObject(4, 0, true); @@ -937,14 +1162,15 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Reload GP value. FI = MipsFI->getGPFI(); - SDValue FIN = DAG.getFrameIndex(FI,getPointerTy()); - SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0, + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), false, false, 0); Chain = GPLoad.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), + Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), GPLoad, SDValue(0,0)); InFlag = Chain.getValue(1); - } + } // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -988,7 +1214,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// -/// LowerFormalArguments - transform physical registers into virtual registers +/// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue MipsTargetLowering::LowerFormalArguments(SDValue Chain, @@ -1018,7 +1244,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, ArgLocs, *DAG.getContext()); if (Subtarget->isABI_O32()) - CCInfo.AnalyzeFormalArguments(Ins, + CCInfo.AnalyzeFormalArguments(Ins, isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); @@ -1037,22 +1263,22 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, TargetRegisterClass *RC = 0; if (RegVT == MVT::i32) - RC = Mips::CPURegsRegisterClass; - else if (RegVT == MVT::f32) + RC = Mips::CPURegsRegisterClass; + else if (RegVT == MVT::f32) RC = Mips::FGR32RegisterClass; else if (RegVT == MVT::f64) { - if (!Subtarget->isSingleFloat()) + if (!Subtarget->isSingleFloat()) RC = Mips::AFGR64RegisterClass; - } else + } else llvm_unreachable("RegVT not supported by FormalArguments Lowering"); - // Transform the arguments stored on + // Transform the arguments stored on // physical registers into virtual ones unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); - - // If this is an 8 or 16-bit value, it has been passed promoted - // to 32 bits. Insert an assert[sz]ext to capture this, then + + // If this is an 8 or 16-bit value, it has been passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. if (VA.getLocInfo() != CCValAssign::Full) { unsigned Opcode = 0; @@ -1061,22 +1287,21 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, else if (VA.getLocInfo() == CCValAssign::ZExt) Opcode = ISD::AssertZext; if (Opcode) - ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue, + ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } - // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64 + // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64 if (Subtarget->isABI_O32()) { - if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32) - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue); + if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32) + ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue); if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) { - unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), + unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg()+1, RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); - SDValue Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue); - SDValue Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue2); - ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi); + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue2, ArgValue); + ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair); } } @@ -1088,13 +1313,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // The last argument is not a register anymore ArgRegEnd = 0; - - // The stack pointer offset is relative to the caller stack frame. - // Since the real stack size is unknown here, a negative SPOffset + + // The stack pointer offset is relative to the caller stack frame. + // Since the real stack size is unknown here, a negative SPOffset // is used so there's a way to adjust these offsets when the stack - // size get known (on EliminateFrameIndex). A dummy SPOffset is + // size get known (on EliminateFrameIndex). A dummy SPOffset is // used instead of a direct negative address (which is recorded to - // be used on emitPrologue) to avoid mis-calc of the first stack + // be used on emitPrologue) to avoid mis-calc of the first stack // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; @@ -1104,7 +1329,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } @@ -1124,11 +1350,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // To meet ABI, when VARARGS are passed on registers, the registers // must have their values written to the caller stack frame. If the last - // argument was placed in the stack, there's no need to save any register. + // argument was placed in the stack, there's no need to save any register. if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) { if (StackPtr.getNode() == 0) StackPtr = DAG.getRegister(StackReg, getPointerTy()); - + // The last register argument that must be saved is Mips::A3 TargetRegisterClass *RC = Mips::CPURegsRegisterClass; unsigned StackLoc = ArgLocs.size()-1; @@ -1140,7 +1366,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(4, 0, true); MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); - OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, + MachinePointerInfo(), false, false, 0)); // Record the frame index of the first variable argument @@ -1150,7 +1377,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, } } - // All stores are grouped in one node to allow the matching between + // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { OutChains.push_back(Chain); @@ -1183,7 +1410,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Mips); - // If this is the first return lowered for this function, add + // If this is the first return lowered for this function, add // the regs to the liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { for (unsigned i = 0; i != RVLocs.size(); ++i) @@ -1198,7 +1425,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are @@ -1215,7 +1442,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); unsigned Reg = MipsFI->getSRetReturnReg(); - if (!Reg) + if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); @@ -1225,10 +1452,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // Return on Mips is always a "jr $ra" if (Flag.getNode()) - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, + return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag); else // Return Void - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, + return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, DAG.getRegister(Mips::RA, MVT::i32)); } @@ -1239,21 +1466,21 @@ MipsTargetLowering::LowerReturn(SDValue Chain, /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. MipsTargetLowering::ConstraintType MipsTargetLowering:: -getConstraintType(const std::string &Constraint) const +getConstraintType(const std::string &Constraint) const { - // Mips specific constrainy + // Mips specific constrainy // GCC config/mips/constraints.md // - // 'd' : An address register. Equivalent to r - // unless generating MIPS16 code. - // 'y' : Equivalent to r; retained for - // backwards compatibility. - // 'f' : Floating Point registers. + // 'd' : An address register. Equivalent to r + // unless generating MIPS16 code. + // 'y' : Equivalent to r; retained for + // backwards compatibility. + // 'f' : Floating Point registers. if (Constraint.size() == 1) { switch (Constraint[0]) { default : break; - case 'd': - case 'y': + case 'd': + case 'y': case 'f': return C_RegisterClass; break; @@ -1262,6 +1489,37 @@ getConstraintType(const std::string &Constraint) const return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +MipsTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + const Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'd': + case 'y': + if (type->isIntegerTy()) + weight = CW_Register; + break; + case 'f': + if (type->isFloatTy()) + weight = CW_Register; + break; + } + return weight; +} + /// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), /// return a list of registers that can be used to satisfy the constraint. /// This should only be used for C_RegisterClass constraints. @@ -1275,7 +1533,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const case 'f': if (VT == MVT::f32) return std::make_pair(0U, Mips::FGR32RegisterClass); - if (VT == MVT::f64) + if (VT == MVT::f64) if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) return std::make_pair(0U, Mips::AFGR64RegisterClass); } @@ -1293,15 +1551,15 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, if (Constraint.size() != 1) return std::vector<unsigned>(); - switch (Constraint[0]) { + switch (Constraint[0]) { default : break; case 'r': // GCC Mips Constraint Letters - case 'd': - case 'y': - return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3, - Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1, - Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, + case 'd': + case 'y': + return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3, + Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1, + Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, Mips::T8, 0); case 'f': @@ -1313,15 +1571,15 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, Mips::F31, 0); else - return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8, - Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26, + return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8, + Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0); } - if (VT == MVT::f64) + if (VT == MVT::f64) if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit())) - return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4, - Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13, + return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4, + Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13, Mips::D14, Mips::D15, 0); } return std::vector<unsigned>(); @@ -1336,5 +1594,7 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (VT != MVT::f32 && VT != MVT::f64) return false; + if (Imm.isNegZero()) + return false; return Imm.isZero(); } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 460747b..9d6b9f3 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -40,9 +40,6 @@ namespace llvm { // Handle gp_rel (small data/bss sections) relocation. GPRel, - // Conditional Move - CMov, - // Select CC Pseudo Instruction SelectCC, @@ -59,7 +56,13 @@ namespace llvm { FPRound, // Return - Ret + Ret, + + // MAdd/Sub nodes + MAdd, + MAddu, + MSub, + MSubu }; } @@ -83,6 +86,8 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: // Subtarget Info const MipsSubtarget *Subtarget; @@ -139,6 +144,11 @@ namespace llvm { // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index cff79966d..977e0df 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -32,7 +32,7 @@ def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; -def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInFlag]>; +def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>; def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, [SDNPHasChain]>; def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 320c5b8..b70266a 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -19,41 +19,53 @@ include "MipsInstrFormats.td" def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; -def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, +def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisInt<1>]>; -def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, +def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisInt<4>]>; def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_MipsMAddMSub : SDTypeProfile<0, 4, + [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, + SDTCisSameAs<2, 3>]>; + // Call -def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, +def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; -// Hi and Lo nodes are used to handle global addresses. Used on -// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol +// Hi and Lo nodes are used to handle global addresses. Used on +// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol // static model. (nothing to do with Mips Registers Hi and Lo) def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>; def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>; def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>; // Return -def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain, - SDNPOptInFlag]>; +def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain, + SDNPOptInGlue]>; // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; // Select Condition Code def MipsSelectCC : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>; -// Conditional Move -def MipsCMov : SDNode<"MipsISD::CMov", SDT_MipsCMov>; +// MAdd*/MSub* nodes +def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub, + [SDNPOptInGlue, SDNPOutGlue]>; +def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub, + [SDNPOptInGlue, SDNPOutGlue]>; +def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub, + [SDNPOptInGlue, SDNPOutGlue]>; +def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub, + [SDNPOptInGlue, SDNPOutGlue]>; //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. @@ -62,6 +74,8 @@ def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">; def HasBitCount : Predicate<"Subtarget.hasBitCount()">; def HasSwap : Predicate<"Subtarget.hasSwap()">; def HasCondMov : Predicate<"Subtarget.hasCondMov()">; +def IsMips32 : Predicate<"Subtarget.isMips32()">; +def IsMips32r2 : Predicate<"Subtarget.isMips32r2()">; //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -126,90 +140,66 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>; let isCommutable = 1 in class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, InstrItinClass itin>: - FR< op, - func, - (outs CPURegs:$dst), - (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; + FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>; let isCommutable = 1 in class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm>: - FR< op, - func, - (outs CPURegs:$dst), - (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [], IIAlu>; + FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>; // Arithmetic 2 register operands class ArithI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : - FI< op, - (outs CPURegs:$dst), - (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>; + FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>; class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : - FI< op, - (outs CPURegs:$dst), - (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [], IIAlu>; + FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>; // Arithmetic Multiply ADD/SUB -let rd=0 in -class MArithR<bits<6> func, string instr_asm> : - FR< 0x1c, - func, - (outs CPURegs:$rs), - (ins CPURegs:$rt), - !strconcat(instr_asm, "\t$rs, $rt"), - [], IIImul>; +let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in +class MArithR<bits<6> func, string instr_asm, SDNode op> : + FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), + !strconcat(instr_asm, "\t$rs, $rt"), + [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul>; // Logical class LogicR<bits<6> func, string instr_asm, SDNode OpNode>: - FR< 0x00, - func, - (outs CPURegs:$dst), - (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; + FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; class LogicI<bits<6> op, string instr_asm, SDNode OpNode>: - FI< op, - (outs CPURegs:$dst), - (ins CPURegs:$b, uimm16:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>; + FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>; class LogicNOR<bits<6> op, bits<6> func, string instr_asm>: - FR< op, - func, - (outs CPURegs:$dst), - (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>; + FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>; // Shifts -let rt = 0 in -class LogicR_shift_imm<bits<6> func, string instr_asm, SDNode OpNode>: - FR< 0x00, - func, - (outs CPURegs:$dst), - (ins CPURegs:$b, shamt:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu>; +class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm, + SDNode OpNode>: + FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> { + let rs = _rs; +} -class LogicR_shift_reg<bits<6> func, string instr_asm, SDNode OpNode>: - FR< 0x00, - func, - (outs CPURegs:$dst), - (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>; +class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm, + SDNode OpNode>: + FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> { + let shamt = _shamt; +} // Load Upper Imediate class LoadUpper<bits<6> op, string instr_asm>: @@ -222,76 +212,55 @@ class LoadUpper<bits<6> op, string instr_asm>: // Memory Load/Store let canFoldAsLoad = 1, hasDelaySlot = 1 in class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>: - FI< op, - (outs CPURegs:$dst), - (ins mem:$addr), - !strconcat(instr_asm, "\t$dst, $addr"), - [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>; + FI<op, (outs CPURegs:$dst), (ins mem:$addr), + !strconcat(instr_asm, "\t$dst, $addr"), + [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>; class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>: - FI< op, - (outs), - (ins CPURegs:$dst, mem:$addr), - !strconcat(instr_asm, "\t$dst, $addr"), - [(OpNode CPURegs:$dst, addr:$addr)], IIStore>; + FI<op, (outs), (ins CPURegs:$dst, mem:$addr), + !strconcat(instr_asm, "\t$dst, $addr"), + [(OpNode CPURegs:$dst, addr:$addr)], IIStore>; // Conditional Branch let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in { class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>: - FI< op, - (outs), - (ins CPURegs:$a, CPURegs:$b, brtarget:$offset), - !strconcat(instr_asm, "\t$a, $b, $offset"), - [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)], - IIBranch>; - + FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset), + !strconcat(instr_asm, "\t$a, $b, $offset"), + [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)], + IIBranch>; class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>: - FI< op, - (outs), - (ins CPURegs:$src, brtarget:$offset), - !strconcat(instr_asm, "\t$src, $offset"), - [(brcond (cond_op CPURegs:$src, 0), bb:$offset)], - IIBranch>; + FI<op, (outs), (ins CPURegs:$src, brtarget:$offset), + !strconcat(instr_asm, "\t$src, $offset"), + [(brcond (cond_op CPURegs:$src, 0), bb:$offset)], + IIBranch>; } // SetCC class SetCC_R<bits<6> op, bits<6> func, string instr_asm, PatFrag cond_op>: - FR< op, - func, - (outs CPURegs:$dst), - (ins CPURegs:$b, CPURegs:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))], - IIAlu>; + FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))], + IIAlu>; class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od, PatLeaf imm_type>: - FI< op, - (outs CPURegs:$dst), - (ins CPURegs:$b, Od:$c), - !strconcat(instr_asm, "\t$dst, $b, $c"), - [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))], - IIAlu>; + FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c), + !strconcat(instr_asm, "\t$dst, $b, $c"), + [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))], + IIAlu>; // Unconditional branch let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in class JumpFJ<bits<6> op, string instr_asm>: - FJ< op, - (outs), - (ins brtarget:$target), - !strconcat(instr_asm, "\t$target"), - [(br bb:$target)], IIBranch>; + FJ<op, (outs), (ins brtarget:$target), + !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>; let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in class JumpFR<bits<6> op, bits<6> func, string instr_asm>: - FR< op, - func, - (outs), - (ins CPURegs:$target), - !strconcat(instr_asm, "\t$target"), - [(brind CPURegs:$target)], IIBranch>; + FR<op, func, (outs), (ins CPURegs:$target), + !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>; // Jump and Link (Call) let isCall=1, hasDelaySlot=1, @@ -299,86 +268,64 @@ let isCall=1, hasDelaySlot=1, Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in { class JumpLink<bits<6> op, string instr_asm>: - FJ< op, - (outs), - (ins calltarget:$target, variable_ops), - !strconcat(instr_asm, "\t$target"), - [(MipsJmpLink imm:$target)], IIBranch>; + FJ<op, (outs), (ins calltarget:$target, variable_ops), + !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], + IIBranch>; let rd=31 in class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>: - FR< op, - func, - (outs), - (ins CPURegs:$rs, variable_ops), - !strconcat(instr_asm, "\t$rs"), - [(MipsJmpLink CPURegs:$rs)], IIBranch>; + FR<op, func, (outs), (ins CPURegs:$rs, variable_ops), + !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>; class BranchLink<string instr_asm>: - FI< 0x1, - (outs), - (ins CPURegs:$rs, brtarget:$target, variable_ops), - !strconcat(instr_asm, "\t$rs, $target"), - [], IIBranch>; + FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops), + !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>; } // Mul, Div class MulDiv<bits<6> func, string instr_asm, InstrItinClass itin>: - FR< 0x00, - func, - (outs), - (ins CPURegs:$a, CPURegs:$b), - !strconcat(instr_asm, "\t$a, $b"), - [], itin>; + FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), + !strconcat(instr_asm, "\t$a, $b"), [], itin>; // Move from Hi/Lo class MoveFromLOHI<bits<6> func, string instr_asm>: - FR< 0x00, - func, - (outs CPURegs:$dst), - (ins), - !strconcat(instr_asm, "\t$dst"), - [], IIHiLo>; + FR<0x00, func, (outs CPURegs:$dst), (ins), + !strconcat(instr_asm, "\t$dst"), [], IIHiLo>; class MoveToLOHI<bits<6> func, string instr_asm>: - FR< 0x00, - func, - (outs), - (ins CPURegs:$src), - !strconcat(instr_asm, "\t$src"), - [], IIHiLo>; + FR<0x00, func, (outs), (ins CPURegs:$src), + !strconcat(instr_asm, "\t$src"), [], IIHiLo>; class EffectiveAddress<string instr_asm> : - FI<0x09, - (outs CPURegs:$dst), - (ins mem:$addr), - instr_asm, - [(set CPURegs:$dst, addr:$addr)], IIAlu>; + FI<0x09, (outs CPURegs:$dst), (ins mem:$addr), + instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>; // Count Leading Ones/Zeros in Word -class CountLeading<bits<6> func, string instr_asm, SDNode CountOp>: - FR< 0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), - [(set CPURegs:$dst, (CountOp CPURegs:$src))], IIAlu>; +class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>: + FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src), + !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>, + Requires<[HasBitCount]> { + let shamt = 0; + let rt = rd; +} // Sign Extend in Register. class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>: - FR< 0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), - [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>; + FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src), + !strconcat(instr_asm, "\t$dst, $src"), + [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>; // Byte Swap class ByteSwap<bits<6> func, string instr_asm>: - FR< 0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src), - !strconcat(instr_asm, "\t$dst, $src"), - [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>; + FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src), + !strconcat(instr_asm, "\t$dst, $src"), + [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>; // Conditional Move class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>: - FR< 0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T, - CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"), - [(set CPURegs:$dst, (MipsCMov CPURegs:$F, CPURegs:$T, - CPURegs:$cond, MovCode))], NoItinerary>; + FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T, + CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"), + [], NoItinerary>; //===----------------------------------------------------------------------===// // Pseudo instructions @@ -408,13 +355,13 @@ def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>; def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>; -// The supported Mips ISAs dont have any instruction close to the SELECT_CC +// The supported Mips ISAs dont have any instruction close to the SELECT_CC // operation. The solution is to create a Mips pseudo SELECT_CC instruction -// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally +// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally // replace it for real supported nodes into EmitInstrWithCustomInserter let usesCustomInserter = 1 in { - class PseudoSelCC<RegisterClass RC, string asmstr>: - MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr, + class PseudoSelCC<RegisterClass RC, string asmstr>: + MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr, [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>; } @@ -451,12 +398,18 @@ def XOR : LogicR<0x26, "xor", xor>; def NOR : LogicNOR<0x00, 0x27, "nor">; /// Shift Instructions -def SLL : LogicR_shift_imm<0x00, "sll", shl>; -def SRL : LogicR_shift_imm<0x02, "srl", srl>; -def SRA : LogicR_shift_imm<0x03, "sra", sra>; -def SLLV : LogicR_shift_reg<0x04, "sllv", shl>; -def SRLV : LogicR_shift_reg<0x06, "srlv", srl>; -def SRAV : LogicR_shift_reg<0x07, "srav", sra>; +def SLL : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>; +def SRL : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>; +def SRA : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>; +def SLLV : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>; +def SRLV : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>; +def SRAV : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>; + +// Rotate Instructions +let Predicates = [IsMips32r2] in { + def ROTR : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>; + def ROTRV : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>; +} /// Load and Store Instructions def LB : LoadM<0x20, "lb", sextloadi8>; @@ -493,7 +446,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target), "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>; -/// Multiply and Divide Instructions. +/// Multiply and Divide Instructions. let Defs = [HI, LO] in { def MULT : MulDiv<0x18, "mult", IIImul>; def MULTu : MulDiv<0x19, "multu", IIImul>; @@ -521,10 +474,10 @@ let Predicates = [HasSEInReg] in { } /// Count Leading -let Predicates = [HasBitCount] in { - let rt = 0 in - def CLZ : CountLeading<0b010110, "clz", ctlz>; -} +def CLZ : CountLeading<0b100000, "clz", + [(set CPURegs:$dst, (ctlz CPURegs:$src))]>; +def CLO : CountLeading<0b100001, "clo", + [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>; /// Byte Swap let Predicates = [HasSwap] in { @@ -551,15 +504,15 @@ let addr=0 in // can be matched. It's similar to Sparc LEA_ADDRi def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">; -// MADD*/MSUB* are not part of MipsI either. -//def MADD : MArithR<0x00, "madd">; -//def MADDU : MArithR<0x01, "maddu">; -//def MSUB : MArithR<0x04, "msub">; -//def MSUBU : MArithR<0x05, "msubu">; +// MADD*/MSUB* +def MADD : MArithR<0, "madd", MipsMAdd>; +def MADDU : MArithR<1, "maddu", MipsMAddu>; +def MSUB : MArithR<4, "msub", MipsMSub>; +def MSUBU : MArithR<5, "msubu", MipsMSubu>; // MUL is a assembly macro in the current used ISAs. In recent ISA's // it is a real instruction. -//def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul>; +def MUL : ArithR<0x1c, 0x02, "mul", mul, IIImul>, Requires<[IsMips32]>; //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions @@ -605,9 +558,9 @@ def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), (ADDiu CPURegs:$hi, tconstpool:$lo)>; // gp_rel relocs -def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), +def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), (ADDiu CPURegs:$gp, tglobaladdr:$in)>; -def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), +def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), (ADDiu CPURegs:$gp, tconstpool:$in)>; // Mips does not have "not", so we expand our way @@ -665,9 +618,15 @@ def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>; -def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F), +def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F), (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>; +// select patterns with got access +def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), + (i32 tglobaladdr:$T), CPURegs:$F), + (MOVN CPURegs:$F, (ADDiu GP, tglobaladdr:$T), + (XOR CPURegs:$lhs, CPURegs:$rhs))>; + // setcc patterns def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs), (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 5723f9e..1e8e4fe 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -26,11 +26,11 @@ namespace llvm { class MipsFunctionInfo : public MachineFunctionInfo { private: - /// Holds for each function where on the stack the Frame Pointer must be + /// Holds for each function where on the stack the Frame Pointer must be /// saved. This is used on Prologue and Epilogue to emit FP save/restore int FPStackOffset; - /// Holds for each function where on the stack the Return Address must be + /// Holds for each function where on the stack the Return Address must be /// saved. This is used on Prologue and Epilogue to emit RA save/restore int RAStackOffset; @@ -51,22 +51,22 @@ private: : FI(FrameIndex), SPOffset(StackPointerOffset) {} }; - /// When PIC is used the GP must be saved on the stack on the function - /// prologue and must be reloaded from this stack location after every - /// call. A reference to its stack location and frame index must be kept + /// When PIC is used the GP must be saved on the stack on the function + /// prologue and must be reloaded from this stack location after every + /// call. A reference to its stack location and frame index must be kept /// to be used on emitPrologue and processFunctionBeforeFrameFinalized. MipsFIHolder GPHolder; /// On LowerFormalArguments the stack size is unknown, so the Stack - /// Pointer Offset calculation of "not in register arguments" must be - /// postponed to emitPrologue. + /// Pointer Offset calculation of "not in register arguments" must be + /// postponed to emitPrologue. SmallVector<MipsFIHolder, 16> FnLoadArgs; bool HasLoadArgs; - // When VarArgs, we must write registers back to caller stack, preserving - // on register arguments. Since the stack size is unknown on + // When VarArgs, we must write registers back to caller stack, preserving + // on register arguments. Since the stack size is unknown on // LowerFormalArguments, the Stack Pointer Offset calculation must be - // postponed to emitPrologue. + // postponed to emitPrologue. SmallVector<MipsFIHolder, 4> FnStoreVarArgs; bool HasStoreVarArgs; @@ -84,9 +84,9 @@ private: int VarArgsFrameIndex; public: - MipsFunctionInfo(MachineFunction& MF) - : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), - FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), + MipsFunctionInfo(MachineFunction& MF) + : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), + FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0) {} @@ -110,7 +110,7 @@ public: bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; } bool hasLoadArgs() const { return HasLoadArgs; } - bool hasStoreVarArgs() const { return HasStoreVarArgs; } + bool hasStoreVarArgs() const { return HasStoreVarArgs; } void recordLoadArgsFI(int FI, int SPOffset) { if (!HasLoadArgs) HasLoadArgs=true; @@ -123,12 +123,12 @@ public: void adjustLoadArgsFI(MachineFrameInfo *MFI) const { if (!hasLoadArgs()) return; - for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) + for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset ); } void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const { - if (!hasStoreVarArgs()) return; - for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) + if (!hasStoreVarArgs()) return; + for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset ); } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 69436d2..3719e58 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -25,7 +25,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" @@ -117,8 +117,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const } BitVector MipsRegisterInfo:: -getReservedRegs(const MachineFunction &MF) const -{ +getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(Mips::ZERO); Reserved.set(Mips::AT); @@ -137,184 +136,6 @@ getReservedRegs(const MachineFunction &MF) const return Reserved; } -//===----------------------------------------------------------------------===// -// -// Stack Frame Processing methods -// +----------------------------+ -// -// The stack is allocated decrementing the stack pointer on -// the first instruction of a function prologue. Once decremented, -// all stack references are done thought a positive offset -// from the stack/frame pointer, so the stack is considering -// to grow up! Otherwise terrible hacks would have to be made -// to get this stack ABI compliant :) -// -// The stack frame required by the ABI (after call): -// Offset -// -// 0 ---------- -// 4 Args to pass -// . saved $GP (used in PIC) -// . Alloca allocations -// . Local Area -// . CPU "Callee Saved" Registers -// . saved FP -// . saved RA -// . FPU "Callee Saved" Registers -// StackSize ----------- -// -// Offset - offset from sp after stack allocation on function prologue -// -// The sp is the stack pointer subtracted/added from the stack size -// at the Prologue/Epilogue -// -// References to the previous stack (to obtain arguments) are done -// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1)) -// -// Examples: -// - reference to the actual stack frame -// for any local area var there is smt like : FI >= 0, StackOffset: 4 -// sw REGX, 4(SP) -// -// - reference to previous stack frame -// suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16. -// The emitted instruction will be something like: -// lw REGX, 16+StackSize(SP) -// -// Since the total stack size is unknown on LowerFormalArguments, all -// stack references (ObjectOffset) created to reference the function -// arguments, are negative numbers. This way, on eliminateFrameIndex it's -// possible to detect those references and the offsets are adjusted to -// their real location. -// -//===----------------------------------------------------------------------===// - -void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const -{ - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - unsigned RegSize = Subtarget.isGP32bit() ? 4 : 8; - bool HasGP = MipsFI->needGPSaveRestore(); - - // Min and Max CSI FrameIndex. - int MinCSFI = -1, MaxCSFI = -1; - - // See the description at MipsMachineFunction.h - int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1; - - // Replace the dummy '0' SPOffset by the negative offsets, as explained on - // LowerFormalArguments. Leaving '0' for while is necessary to avoid - // the approach done by calculateFrameObjectOffsets to the stack frame. - MipsFI->adjustLoadArgsFI(MFI); - MipsFI->adjustStoreVarArgsFI(MFI); - - // It happens that the default stack frame allocation order does not directly - // map to the convention used for mips. So we must fix it. We move the callee - // save register slots after the local variables area, as described in the - // stack frame above. - unsigned CalleeSavedAreaSize = 0; - if (!CSI.empty()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size()-1].getFrameIdx(); - } - for (unsigned i = 0, e = CSI.size(); i != e; ++i) - CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx()); - - unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize) - : (Subtarget.isABI_O32() ? 16 : 0); - - // Adjust local variables. They should come on the stack right - // after the arguments. - int LastOffsetFI = -1; - for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (i >= MinCSFI && i <= MaxCSFI) - continue; - if (MFI->isDeadObjectIndex(i)) - continue; - unsigned Offset = - StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize; - if (LastOffsetFI == -1) - LastOffsetFI = i; - if (Offset > MFI->getObjectOffset(LastOffsetFI)) - LastOffsetFI = i; - MFI->setObjectOffset(i, Offset); - } - - // Adjust CPU Callee Saved Registers Area. Registers RA and FP must - // be saved in this CPU Area. This whole area must be aligned to the - // default Stack Alignment requirements. - if (LastOffsetFI >= 0) - StackOffset = MFI->getObjectOffset(LastOffsetFI)+ - MFI->getObjectSize(LastOffsetFI); - StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); - - for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { - unsigned Reg = CSI[i].getReg(); - if (!Mips::CPURegsRegisterClass->contains(Reg)) - break; - MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); - TopCPUSavedRegOff = StackOffset; - StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); - } - - // Stack locations for FP and RA. If only one of them is used, - // the space must be allocated for both, otherwise no space at all. - if (hasFP(MF) || MFI->adjustsStack()) { - // FP stack location - MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), - StackOffset); - MipsFI->setFPStackOffset(StackOffset); - TopCPUSavedRegOff = StackOffset; - StackOffset += RegSize; - - // SP stack location - MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), - StackOffset); - MipsFI->setRAStackOffset(StackOffset); - StackOffset += RegSize; - - if (MFI->adjustsStack()) - TopCPUSavedRegOff += RegSize; - } - - StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); - - // Adjust FPU Callee Saved Registers Area. This Area must be - // aligned to the default Stack Alignment requirements. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (Mips::CPURegsRegisterClass->contains(Reg)) - continue; - MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); - TopFPUSavedRegOff = StackOffset; - StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx()); - } - StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); - - // Update frame info - MFI->setStackSize(StackOffset); - - // Recalculate the final tops offset. The final values must be '0' - // if there isn't a callee saved register for CPU or FPU, otherwise - // a negative offset is needed. - if (TopCPUSavedRegOff >= 0) - MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset); - - if (TopFPUSavedRegOff >= 0) - MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset); -} - -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -bool MipsRegisterInfo:: -hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); -} - // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void MipsRegisterInfo:: @@ -363,106 +184,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } void MipsRegisterInfo:: -emitPrologue(MachineFunction &MF) const -{ - MachineBasicBlock &MBB = MF.front(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); - - // Get the right frame order for Mips. - adjustMipsStackFrame(MF); - - // Get the number of bytes to allocate from the FrameInfo. - unsigned StackSize = MFI->getStackSize(); - - // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->adjustsStack()) return; - - int FPOffset = MipsFI->getFPStackOffset(); - int RAOffset = MipsFI->getRAStackOffset(); - - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); - - // TODO: check need from GP here. - if (isPIC && Subtarget.isABI_O32()) - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)).addReg(getPICCallReg()); - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); - - // Adjust stack : addi sp, sp, (-imm) - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(Mips::SP).addImm(-StackSize); - - // Save the return address only if the function isnt a leaf one. - // sw $ra, stack_loc($sp) - if (MFI->adjustsStack()) { - BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) - .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP); - } - - // if framepointer enabled, save it and set it - // to point to the stack pointer - if (hasFP(MF)) { - // sw $fp,stack_loc($sp) - BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) - .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP); - - // move $fp, $sp - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP) - .addReg(Mips::SP).addReg(Mips::ZERO); - } - - // Restore GP from the saved stack location - if (MipsFI->needGPSaveRestore()) - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)) - .addImm(MipsFI->getGPStackOffset()); -} - -void MipsRegisterInfo:: -emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const -{ - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - DebugLoc dl = MBBI->getDebugLoc(); - - // Get the number of bytes from FrameInfo - int NumBytes = (int) MFI->getStackSize(); - - // Get the FI's where RA and FP are saved. - int FPOffset = MipsFI->getFPStackOffset(); - int RAOffset = MipsFI->getRAStackOffset(); - - // if framepointer enabled, restore it and restore the - // stack pointer - if (hasFP(MF)) { - // move $sp, $fp - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP) - .addReg(Mips::FP).addReg(Mips::ZERO); - - // lw $fp,stack_loc($sp) - BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP) - .addImm(FPOffset).addReg(Mips::SP); - } - - // Restore the return address only if the function isnt a leaf one. - // lw $ra, stack_loc($sp) - if (MFI->adjustsStack()) { - BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA) - .addImm(RAOffset).addReg(Mips::SP); - } - - // adjust stack : insert addi sp, sp, (imm) - if (NumBytes) { - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(Mips::SP).addImm(NumBytes); - } -} - - -void MipsRegisterInfo:: processFunctionBeforeFrameFinalized(MachineFunction &MF) const { // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -478,7 +199,9 @@ getRARegister() const { unsigned MipsRegisterInfo:: getFrameRegister(const MachineFunction &MF) const { - return hasFP(MF) ? Mips::FP : Mips::SP; + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + return TFI->hasFP(MF) ? Mips::FP : Mips::SP; } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 89282f8..a7f4bf9 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -44,8 +44,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -56,9 +54,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - /// Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 055ff32..49ca5d1 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -40,7 +40,7 @@ def IIPseudo : InstrItinClass; //===----------------------------------------------------------------------===// // Mips Generic instruction itineraries. //===----------------------------------------------------------------------===// -def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [ +def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 2d5fd22..e4f4b33 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -31,7 +31,7 @@ public: protected: enum MipsArchEnum { - Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2, Mips64, Mips64r2 + Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2 }; // Mips architecture version @@ -100,6 +100,8 @@ public: const std::string &CPU); bool isMips1() const { return MipsArchVersion == Mips1; } + bool isMips32() const { return MipsArchVersion >= Mips32; } + bool isMips32r2() const { return MipsArchVersion == Mips32r2; } bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index ad3eb9e..7a2dd1f 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -30,18 +30,18 @@ extern "C" void LLVMInitializeMipsTarget() { // The stack is always 8 byte aligned // On function prologue, the stack is created by decrementing // its pointer. Once decremented, all references are done with positive -// offset from the stack/frame pointer, using StackGrowsUp enables +// offset from the stack/frame pointer, using StackGrowsUp enables // an easier handling. // Using CodeModel::Large enables different CALL behavior. MipsTargetMachine:: MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool isLittle=false): LLVMTargetMachine(T, TT), - Subtarget(TT, FS, isLittle), + Subtarget(TT, FS, isLittle), DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-n32") : - std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), - InstrInfo(*this), - FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), + std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), + InstrInfo(*this), + FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this) { // Abicall enables PIC by default if (getRelocationModel() == Reloc::Default) { @@ -57,20 +57,20 @@ MipselTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : MipsTargetMachine(T, TT, FS, true) {} -// Install an instruction selector pass using +// Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsTargetMachine:: -addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createMipsISelDag(*this)); return false; } -// Implemented by targets that want to run passes immediately before -// machine code is emitted. return true if -print-machineinstrs should +// Implemented by targets that want to run passes immediately before +// machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MipsTargetMachine:: -addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createMipsDelaySlotFillerPass(*this)); return true; diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index d63976f..43ab798 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -17,39 +17,40 @@ #include "MipsSubtarget.h" #include "MipsInstrInfo.h" #include "MipsISelLowering.h" +#include "MipsFrameLowering.h" #include "MipsSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" namespace llvm { class formatted_raw_ostream; - + class MipsTargetMachine : public LLVMTargetMachine { MipsSubtarget Subtarget; const TargetData DataLayout; // Calculates type size & alignment MipsInstrInfo InstrInfo; - TargetFrameInfo FrameInfo; + MipsFrameLowering FrameLowering; MipsTargetLowering TLInfo; MipsSelectionDAGInfo TSInfo; public: MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool isLittle); - - virtual const MipsInstrInfo *getInstrInfo() const + + virtual const MipsInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameInfo *getFrameInfo() const - { return &FrameInfo; } - virtual const MipsSubtarget *getSubtargetImpl() const + virtual const TargetFrameLowering *getFrameLowering() const + { return &FrameLowering; } + virtual const MipsSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const TargetData *getTargetData() const + virtual const TargetData *getTargetData() const { return &DataLayout;} virtual const MipsRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual const MipsTargetLowering *getTargetLowering() const { + virtual const MipsTargetLowering *getTargetLowering() const { return &TLInfo; } diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 405f419..cf5d1b5 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -16,6 +16,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" using namespace llvm; static cl::opt<unsigned> @@ -25,21 +26,21 @@ SSThreshold("mips-ssection-threshold", cl::Hidden, void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); - + SmallDataSection = - getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getDataRel()); - + SmallBSSSection = - getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getBSS()); - + } -// A address must be loaded from a small section if its size is less than the -// small section size threshold. Data in this section must be addressed using +// A address must be loaded from a small section if its size is less than the +// small section size threshold. Data in this section must be addressed using // gp_rel operator. static bool IsInSmallSection(uint64_t Size) { return Size > 0 && Size <= SSThreshold; @@ -49,7 +50,7 @@ bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) return false; - + return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); } @@ -68,11 +69,11 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); if (!GVA) return false; - + // We can only do this for datarel or BSS objects for now. if (!Kind.isBSS() && !Kind.isDataRel()) return false; - + // If this is a internal constant string, there is a special // section for it, but not in small data/bss. if (Kind.isMergeable1ByteCString()) @@ -89,13 +90,13 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*" // sections? - + // Handle Small Section classification here. if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) return SmallBSSSection; if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) return SmallDataSection; - + // Otherwise, we work the same as ELF. return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); } diff --git a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt deleted file mode 100644 index d36bb8e..0000000 --- a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories( - ${CMAKE_CURRENT_BINARY_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - -add_llvm_library(LLVMPIC16AsmPrinter - PIC16AsmPrinter.cpp - ) -add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen) diff --git a/lib/Target/PIC16/AsmPrinter/Makefile b/lib/Target/PIC16/AsmPrinter/Makefile deleted file mode 100644 index e3c0684..0000000 --- a/lib/Target/PIC16/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PIC16/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPIC16AsmPrinter - -# Hack: we need to include 'main' pic16 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp deleted file mode 100644 index b665817..0000000 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ /dev/null @@ -1,512 +0,0 @@ -//===-- PIC16AsmPrinter.cpp - PIC16 LLVM assembly writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to PIC16 assembly language. -// -//===----------------------------------------------------------------------===// - -#include "PIC16ABINames.h" -#include "PIC16AsmPrinter.h" -#include "PIC16Section.h" -#include "PIC16MCAsmInfo.h" -#include "PIC16MachineFunctionInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" -#include <cstring> -using namespace llvm; - -#include "PIC16GenAsmWriter.inc" - -PIC16AsmPrinter::PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) -: AsmPrinter(TM, Streamer), DbgInfo(Streamer, TM.getMCAsmInfo()) { - PMAI = static_cast<const PIC16MCAsmInfo*>(TM.getMCAsmInfo()); - PTOF = &getObjFileLowering(); -} - -void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - - OutStreamer.EmitRawText(OS.str()); -} - -static int getFunctionColor(const Function *F) { - if (F->hasSection()) { - std::string Sectn = F->getSection(); - std::string StrToFind = "Overlay="; - std::string::size_type Pos = Sectn.find(StrToFind); - - // Retreive the color number if the key is found. - if (Pos != std::string::npos) { - Pos += StrToFind.length(); - std::string Color = ""; - char c = Sectn.at(Pos); - // A Color can only consist of digits. - while (c >= '0' && c<= '9') { - Color.append(1,c); - Pos++; - if (Pos >= Sectn.length()) - break; - c = Sectn.at(Pos); - } - return atoi(Color.c_str()); - } - } - - // Color was not set for function, so return -1. - return -1; -} - -// Color the Auto section of the given function. -void PIC16AsmPrinter::ColorAutoSection(const Function *F) { - std::string SectionName = PAN::getAutosSectionName(CurrentFnSym->getName()); - PIC16Section* Section = PTOF->findPIC16Section(SectionName); - if (Section != NULL) { - int Color = getFunctionColor(F); - if (Color >= 0) - Section->setColor(Color); - } -} - - -/// runOnMachineFunction - This emits the frame section, autos section and -/// assembly for each instruction. Also takes care of function begin debug -/// directive and file begin debug directive (if required) for the function. -/// -bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - // This calls the base class function required to be called at beginning - // of runOnMachineFunction. - SetupMachineFunction(MF); - - // Put the color information from function to its auto section. - const Function *F = MF.getFunction(); - ColorAutoSection(F); - - // Emit the function frame (args and temps). - EmitFunctionFrame(MF); - - DbgInfo.BeginFunction(MF); - - // Now emit the instructions of function in its code section. - const MCSection *fCodeSection = - getObjFileLowering().SectionForCode(CurrentFnSym->getName(), - PAN::isISR(F->getSection())); - - // Start the Code Section. - OutStreamer.SwitchSection(fCodeSection); - - // Emit the frame address of the function at the beginning of code. - OutStreamer.EmitRawText("\tretlw low(" + - Twine(PAN::getFrameLabel(CurrentFnSym->getName())) + - ")"); - OutStreamer.EmitRawText("\tretlw high(" + - Twine(PAN::getFrameLabel(CurrentFnSym->getName())) + - ")"); - - // Emit function start label. - OutStreamer.EmitLabel(CurrentFnSym); - - DebugLoc CurDL; - // Print out code for the function. - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - - // Print a label for the basic block. - if (I != MF.begin()) - EmitBasicBlockStart(I); - - // Print a basic block. - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Emit the line directive if source line changed. - DebugLoc DL = II->getDebugLoc(); - if (!DL.isUnknown() && DL != CurDL) { - DbgInfo.ChangeDebugLoc(MF, DL); - CurDL = DL; - } - - // Print the assembly for the instruction. - EmitInstruction(II); - } - } - - // Emit function end debug directives. - DbgInfo.EndFunction(MF); - - return false; // we didn't modify anything. -} - - -// printOperand - print operand of insn. -void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - const Function *F = MI->getParent()->getParent()->getFunction(); - - switch (MO.getType()) { - case MachineOperand::MO_Register: - { - // For indirect load/store insns, the fsr name is printed as INDF. - std::string RegName = getRegisterName(MO.getReg()); - if ((MI->getOpcode() == PIC16::load_indirect) || - (MI->getOpcode() == PIC16::store_indirect)) - RegName.replace (0, 3, "INDF"); - O << RegName; - } - return; - - case MachineOperand::MO_Immediate: - O << (int)MO.getImm(); - return; - - case MachineOperand::MO_GlobalAddress: { - MCSymbol *Sym = Mang->getSymbol(MO.getGlobal()); - // FIXME: currently we do not have a memcpy def coming in the module - // by any chance, as we do not link in those as .bc lib. So these calls - // are always external and it is safe to emit an extern. - if (PAN::isMemIntrinsic(Sym->getName())) - LibcallDecls.insert(Sym->getName()); - - O << *Sym; - break; - } - case MachineOperand::MO_ExternalSymbol: { - const char *Sname = MO.getSymbolName(); - std::string Printname = Sname; - - // Intrinsic stuff needs to be renamed if we are printing IL fn. - if (PAN::isIntrinsicStuff(Printname)) { - if (PAN::isISR(F->getSection())) { - Printname = PAN::Rename(Sname); - } - // Record these decls, we need to print them in asm as extern. - LibcallDecls.insert(Printname); - } - - O << Printname; - break; - } - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - - default: - llvm_unreachable(" Operand type not supported."); - } -} - -/// printCCOperand - Print the cond code operand. -/// -void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - int CC = (int)MI->getOperand(opNum).getImm(); - O << PIC16CondCodeToString((PIC16CC::CondCodes)CC); -} - -/// printLibcallDecls - print the extern declarations for compiler -/// intrinsics. -/// -void PIC16AsmPrinter::printLibcallDecls() { - // If no libcalls used, return. - if (LibcallDecls.empty()) return; - - OutStreamer.AddComment("External decls for libcalls - BEGIN"); - OutStreamer.AddBlankLine(); - - for (std::set<std::string>::const_iterator I = LibcallDecls.begin(), - E = LibcallDecls.end(); I != E; I++) - OutStreamer.EmitRawText(MAI->getExternDirective() + Twine(*I)); - - OutStreamer.AddComment("External decls for libcalls - END"); - OutStreamer.AddBlankLine(); -} - -/// doInitialization - Perform Module level initializations here. -/// One task that we do here is to sectionize all global variables. -/// The MemSelOptimizer pass depends on the sectionizing. -/// -bool PIC16AsmPrinter::doInitialization(Module &M) { - bool Result = AsmPrinter::doInitialization(M); - - // Every asmbly contains these std headers. - OutStreamer.EmitRawText(StringRef("\n#include p16f1xxx.inc")); - OutStreamer.EmitRawText(StringRef("#include stdmacros.inc")); - - // Set the section names for all globals. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - - // Record External Var Decls. - if (I->isDeclaration()) { - ExternalVarDecls.push_back(I); - continue; - } - - // Record Exteranl Var Defs. - if (I->hasExternalLinkage() || I->hasCommonLinkage()) { - ExternalVarDefs.push_back(I); - } - - // Sectionify actual data. - if (!I->hasAvailableExternallyLinkage()) { - const MCSection *S = getObjFileLowering().SectionForGlobal(I, Mang, TM); - - I->setSection(((const PIC16Section *)S)->getName()); - } - } - - DbgInfo.BeginModule(M); - EmitFunctionDecls(M); - EmitUndefinedVars(M); - EmitDefinedVars(M); - EmitIData(M); - EmitUData(M); - EmitRomData(M); - EmitSharedUdata(M); - EmitUserSections(M); - return Result; -} - -/// Emit extern decls for functions imported from other modules, and emit -/// global declarations for function defined in this module and which are -/// available to other modules. -/// -void PIC16AsmPrinter::EmitFunctionDecls(Module &M) { - // Emit declarations for external functions. - OutStreamer.AddComment("Function Declarations - BEGIN"); - OutStreamer.AddBlankLine(); - for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) { - if (I->isIntrinsic() || I->getName() == "@abort") - continue; - - if (!I->isDeclaration() && !I->hasExternalLinkage()) - continue; - - MCSymbol *Sym = Mang->getSymbol(I); - - // Do not emit memcpy, memset, and memmove here. - // Calls to these routines can be generated in two ways, - // 1. User calling the standard lib function - // 2. Codegen generating these calls for llvm intrinsics. - // In the first case a prototype is alread availale, while in - // second case the call is via and externalsym and the prototype is missing. - // So declarations for these are currently always getting printing by - // tracking both kind of references in printInstrunction. - if (I->isDeclaration() && PAN::isMemIntrinsic(Sym->getName())) continue; - - const char *directive = I->isDeclaration() ? MAI->getExternDirective() : - MAI->getGlobalDirective(); - - OutStreamer.EmitRawText(directive + Twine(Sym->getName())); - OutStreamer.EmitRawText(directive + - Twine(PAN::getRetvalLabel(Sym->getName()))); - OutStreamer.EmitRawText(directive + - Twine(PAN::getArgsLabel(Sym->getName()))); - } - - OutStreamer.AddComment("Function Declarations - END"); - OutStreamer.AddBlankLine(); - -} - -// Emit variables imported from other Modules. -void PIC16AsmPrinter::EmitUndefinedVars(Module &M) { - std::vector<const GlobalVariable*> Items = ExternalVarDecls; - if (!Items.size()) return; - - OutStreamer.AddComment("Imported Variables - BEGIN"); - OutStreamer.AddBlankLine(); - for (unsigned j = 0; j < Items.size(); j++) - OutStreamer.EmitRawText(MAI->getExternDirective() + - Twine(Mang->getSymbol(Items[j])->getName())); - - OutStreamer.AddComment("Imported Variables - END"); - OutStreamer.AddBlankLine(); -} - -// Emit variables defined in this module and are available to other modules. -void PIC16AsmPrinter::EmitDefinedVars(Module &M) { - std::vector<const GlobalVariable*> Items = ExternalVarDefs; - if (!Items.size()) return; - - OutStreamer.AddComment("Exported Variables - BEGIN"); - OutStreamer.AddBlankLine(); - - for (unsigned j = 0; j < Items.size(); j++) - OutStreamer.EmitRawText(MAI->getGlobalDirective() + - Twine(Mang->getSymbol(Items[j])->getName())); - OutStreamer.AddComment("Exported Variables - END"); - OutStreamer.AddBlankLine(); -} - -// Emit initialized data placed in ROM. -void PIC16AsmPrinter::EmitRomData(Module &M) { - EmitSingleSection(PTOF->ROMDATASection()); -} - -// Emit Shared section udata. -void PIC16AsmPrinter::EmitSharedUdata(Module &M) { - EmitSingleSection(PTOF->SHAREDUDATASection()); -} - -bool PIC16AsmPrinter::doFinalization(Module &M) { - EmitAllAutos(M); - printLibcallDecls(); - DbgInfo.EndModule(M); - OutStreamer.EmitRawText(StringRef("\tEND")); - return AsmPrinter::doFinalization(M); -} - -void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { - const Function *F = MF.getFunction(); - const TargetData *TD = TM.getTargetData(); - PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); - - // Emit the data section name. - - PIC16Section *fPDataSection = - const_cast<PIC16Section *>(getObjFileLowering(). - SectionForFrame(CurrentFnSym->getName())); - - fPDataSection->setColor(getFunctionColor(F)); - OutStreamer.SwitchSection(fPDataSection); - - // Emit function frame label - OutStreamer.EmitRawText(PAN::getFrameLabel(CurrentFnSym->getName()) + - Twine(":")); - - const Type *RetType = F->getReturnType(); - unsigned RetSize = 0; - if (RetType->getTypeID() != Type::VoidTyID) - RetSize = TD->getTypeAllocSize(RetType); - - //Emit function return value space - // FIXME: Do not emit RetvalLable when retsize is zero. To do this - // we will need to avoid printing a global directive for Retval label - // in emitExternandGloblas. - if(RetSize > 0) - OutStreamer.EmitRawText(PAN::getRetvalLabel(CurrentFnSym->getName()) + - Twine(" RES ") + Twine(RetSize)); - else - OutStreamer.EmitRawText(PAN::getRetvalLabel(CurrentFnSym->getName()) + - Twine(":")); - - // Emit variable to hold the space for function arguments - unsigned ArgSize = 0; - for (Function::const_arg_iterator argi = F->arg_begin(), - arge = F->arg_end(); argi != arge ; ++argi) { - const Type *Ty = argi->getType(); - ArgSize += TD->getTypeAllocSize(Ty); - } - - OutStreamer.EmitRawText(PAN::getArgsLabel(CurrentFnSym->getName()) + - Twine(" RES ") + Twine(ArgSize)); - - // Emit temporary space - int TempSize = FuncInfo->getTmpSize(); - if (TempSize > 0) - OutStreamer.EmitRawText(PAN::getTempdataLabel(CurrentFnSym->getName()) + - Twine(" RES ") + Twine(TempSize)); -} - - -void PIC16AsmPrinter::EmitInitializedDataSection(const PIC16Section *S) { - /// Emit Section header. - OutStreamer.SwitchSection(S); - - std::vector<const GlobalVariable*> Items = S->Items; - for (unsigned j = 0; j < Items.size(); j++) { - Constant *C = Items[j]->getInitializer(); - int AddrSpace = Items[j]->getType()->getAddressSpace(); - OutStreamer.EmitRawText(Mang->getSymbol(Items[j])->getName()); - EmitGlobalConstant(C, AddrSpace); - } -} - -// Print all IDATA sections. -void PIC16AsmPrinter::EmitIData(Module &M) { - EmitSectionList (M, PTOF->IDATASections()); -} - -void PIC16AsmPrinter:: -EmitUninitializedDataSection(const PIC16Section *S) { - const TargetData *TD = TM.getTargetData(); - OutStreamer.SwitchSection(S); - std::vector<const GlobalVariable*> Items = S->Items; - for (unsigned j = 0; j < Items.size(); j++) { - Constant *C = Items[j]->getInitializer(); - const Type *Ty = C->getType(); - unsigned Size = TD->getTypeAllocSize(Ty); - OutStreamer.EmitRawText(Mang->getSymbol(Items[j])->getName() + - Twine(" RES ") + Twine(Size)); - } -} - -// Print all UDATA sections. -void PIC16AsmPrinter::EmitUData(Module &M) { - EmitSectionList (M, PTOF->UDATASections()); -} - -// Print all USER sections. -void PIC16AsmPrinter::EmitUserSections(Module &M) { - EmitSectionList (M, PTOF->USERSections()); -} - -// Print all AUTO sections. -void PIC16AsmPrinter::EmitAllAutos(Module &M) { - EmitSectionList (M, PTOF->AUTOSections()); -} - -extern "C" void LLVMInitializePIC16AsmPrinter() { - RegisterAsmPrinter<PIC16AsmPrinter> X(ThePIC16Target); -} - -// Emit one data section using correct section emitter based on section type. -void PIC16AsmPrinter::EmitSingleSection(const PIC16Section *S) { - if (S == NULL) return; - - switch (S->getType()) { - default: llvm_unreachable ("unknow user section type"); - case UDATA: - case UDATA_SHR: - case UDATA_OVR: - EmitUninitializedDataSection(S); - break; - case IDATA: - case ROMDATA: - EmitInitializedDataSection(S); - break; - } -} - -// Emit a list of sections. -void PIC16AsmPrinter:: -EmitSectionList(Module &M, const std::vector<PIC16Section *> &SList) { - for (unsigned i = 0; i < SList.size(); i++) { - // Exclude llvm specific metadata sections. - if (SList[i]->getName().find("llvm.") != std::string::npos) - continue; - OutStreamer.AddBlankLine(); - EmitSingleSection(SList[i]); - } -} - diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h deleted file mode 100644 index aa2e1f4..0000000 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ /dev/null @@ -1,88 +0,0 @@ -//===-- PIC16AsmPrinter.h - PIC16 LLVM assembly writer ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to PIC16 assembly language. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16ASMPRINTER_H -#define PIC16ASMPRINTER_H - -#include "PIC16.h" -#include "PIC16TargetMachine.h" -#include "PIC16DebugInfo.h" -#include "PIC16MCAsmInfo.h" -#include "PIC16TargetObjectFile.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetMachine.h" -#include <list> -#include <set> -#include <string> - -namespace llvm { - class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter { - public: - explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer); - private: - virtual const char *getPassName() const { - return "PIC16 Assembly Printer"; - } - - const PIC16TargetObjectFile &getObjFileLowering() const { - return (const PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering(); - } - - bool runOnMachineFunction(MachineFunction &F); - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printInstruction(const MachineInstr *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - void EmitInstruction(const MachineInstr *MI); - void EmitFunctionDecls (Module &M); - void EmitUndefinedVars (Module &M); - void EmitDefinedVars (Module &M); - void EmitIData (Module &M); - void EmitUData (Module &M); - void EmitAllAutos (Module &M); - void EmitRomData (Module &M); - void EmitSharedUdata(Module &M); - void EmitUserSections (Module &M); - void EmitFunctionFrame(MachineFunction &MF); - void printLibcallDecls(); - void EmitUninitializedDataSection(const PIC16Section *S); - void EmitInitializedDataSection(const PIC16Section *S); - void EmitSingleSection(const PIC16Section *S); - void EmitSectionList(Module &M, - const std::vector< PIC16Section *> &SList); - void ColorAutoSection(const Function *F); - protected: - bool doInitialization(Module &M); - bool doFinalization(Module &M); - - /// EmitGlobalVariable - Emit the specified global variable and its - /// initializer to the output stream. - virtual void EmitGlobalVariable(const GlobalVariable *GV) { - // PIC16 doesn't use normal hooks for this. - } - - private: - const PIC16TargetObjectFile *PTOF; - PIC16DbgInfo DbgInfo; - const PIC16MCAsmInfo *PMAI; - std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls. - std::vector<const GlobalVariable *> ExternalVarDecls; - std::vector<const GlobalVariable *> ExternalVarDefs; - }; -} // end of namespace - -#endif diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt deleted file mode 100644 index 2b6cb9e..0000000 --- a/lib/Target/PIC16/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PIC16.td) - -tablegen(PIC16GenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PIC16GenRegisterNames.inc -gen-register-enums) -tablegen(PIC16GenRegisterInfo.inc -gen-register-desc) -tablegen(PIC16GenInstrNames.inc -gen-instr-enums) -tablegen(PIC16GenInstrInfo.inc -gen-instr-desc) -tablegen(PIC16GenAsmWriter.inc -gen-asm-writer) -tablegen(PIC16GenDAGISel.inc -gen-dag-isel) -tablegen(PIC16GenCallingConv.inc -gen-callingconv) -tablegen(PIC16GenSubtarget.inc -gen-subtarget) - -add_llvm_target(PIC16CodeGen - PIC16DebugInfo.cpp - PIC16InstrInfo.cpp - PIC16ISelDAGToDAG.cpp - PIC16ISelLowering.cpp - PIC16MemSelOpt.cpp - PIC16MCAsmInfo.cpp - PIC16RegisterInfo.cpp - PIC16Section.cpp - PIC16Subtarget.cpp - PIC16TargetMachine.cpp - PIC16TargetObjectFile.cpp - PIC16SelectionDAGInfo.cpp - ) diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile deleted file mode 100644 index 9e784d1..0000000 --- a/lib/Target/PIC16/Makefile +++ /dev/null @@ -1,24 +0,0 @@ -##===- lib/Target/PIC16/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMPIC16CodeGen -TARGET = PIC16 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \ - PIC16GenRegisterInfo.inc PIC16GenInstrNames.inc \ - PIC16GenInstrInfo.inc PIC16GenAsmWriter.inc \ - PIC16GenDAGISel.inc PIC16GenCallingConv.inc \ - PIC16GenSubtarget.inc - -DIRS = AsmPrinter TargetInfo PIC16Passes - -include $(LEVEL)/Makefile.common - diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h deleted file mode 100644 index 08bb3e6..0000000 --- a/lib/Target/PIC16/PIC16.h +++ /dev/null @@ -1,134 +0,0 @@ -//===-- PIC16.h - Top-level interface for PIC16 representation --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in -// the LLVM PIC16 back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_PIC16_H -#define LLVM_TARGET_PIC16_H - -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" -#include <cassert> -#include <sstream> -#include <cstring> -#include <string> -#include <vector> - -namespace llvm { - class PIC16TargetMachine; - class FunctionPass; - class MachineCodeEmitter; - class formatted_raw_ostream; - -namespace PIC16CC { - enum CondCodes { - EQ, - NE, - LT, - LE, - GT, - GE, - ULT, - UGT, - ULE, - UGE - }; -} - - enum PIC16SectionType { - CODE, - UDATA, - IDATA, - ROMDATA, - UDATA_OVR, - UDATA_SHR - }; - - class ESNames { - std::vector<char*> stk; - ESNames() {} - public: - ~ESNames() { - while (!stk.empty()) - { - char* p = stk.back(); - delete [] p; - stk.pop_back(); - } - } - - // External symbol names require memory to live till the program end. - // So we have to allocate it and keep. Push all such allocations into a - // vector so that they get freed up on termination. - inline static const char *createESName (const std::string &name) { - static ESNames esn; - char *tmpName = new char[name.size() + 1]; - memcpy(tmpName, name.c_str(), name.size() + 1); - esn.stk.push_back(tmpName); - return tmpName; - } - - }; - - inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case PIC16CC::NE: return "ne"; - case PIC16CC::EQ: return "eq"; - case PIC16CC::LT: return "lt"; - case PIC16CC::ULT: return "lt"; - case PIC16CC::LE: return "le"; - case PIC16CC::ULE: return "le"; - case PIC16CC::GT: return "gt"; - case PIC16CC::UGT: return "gt"; - case PIC16CC::GE: return "ge"; - case PIC16CC::UGE: return "ge"; - } - } - - inline static bool isSignedComparison(PIC16CC::CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case PIC16CC::NE: - case PIC16CC::EQ: - case PIC16CC::LT: - case PIC16CC::LE: - case PIC16CC::GE: - case PIC16CC::GT: - return true; - case PIC16CC::ULT: - case PIC16CC::UGT: - case PIC16CC::ULE: - case PIC16CC::UGE: - return false; // condition codes for unsigned comparison. - } - } - - - - FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM); - // Banksel optimizer pass. - FunctionPass *createPIC16MemSelOptimizerPass(); - - extern Target ThePIC16Target; - extern Target TheCooperTarget; - -} // end namespace llvm; - -// Defines symbolic names for PIC16 registers. This defines a mapping from -// register name to register number. -#include "PIC16GenRegisterNames.inc" - -// Defines symbolic names for the PIC16 instructions. -#include "PIC16GenInstrNames.inc" - -#endif diff --git a/lib/Target/PIC16/PIC16.td b/lib/Target/PIC16/PIC16.td deleted file mode 100644 index b2b9b1c..0000000 --- a/lib/Target/PIC16/PIC16.td +++ /dev/null @@ -1,40 +0,0 @@ -//===- PIC16.td - Describe the PIC16 Target Machine -----------*- tblgen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This is the top level entry point for the PIC16 target. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -include "PIC16RegisterInfo.td" -include "PIC16InstrInfo.td" - -//===----------------------------------------------------------------------===// -// Subtarget Features. -//===----------------------------------------------------------------------===// -def FeatureCooper : SubtargetFeature<"cooper", "IsCooper", "true", - "PIC16 Cooper ISA Support">; - -//===----------------------------------------------------------------------===// -// PIC16 supported processors. -//===----------------------------------------------------------------------===// - -def : Processor<"generic", NoItineraries, []>; -def : Processor<"cooper", NoItineraries, [FeatureCooper]>; - - -def PIC16InstrInfo : InstrInfo {} - -def PIC16 : Target { - let InstructionSet = PIC16InstrInfo; -} - diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h deleted file mode 100644 index 4c1a8da..0000000 --- a/lib/Target/PIC16/PIC16ABINames.h +++ /dev/null @@ -1,399 +0,0 @@ -//===-- PIC16ABINames.h - PIC16 Naming conventios for ABI----- --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions to manage ABI Naming conventions for PIC16. -// -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_PIC16ABINAMES_H -#define LLVM_TARGET_PIC16ABINAMES_H - -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" -#include <cassert> -#include <sstream> -#include <cstring> -#include <string> - -namespace llvm { - class PIC16TargetMachine; - class FunctionPass; - class MachineCodeEmitter; - class formatted_raw_ostream; - - // A Central class to manage all ABI naming conventions. - // PAN - [P]ic16 [A]BI [N]ames - class PAN { - public: - // Map the name of the symbol to its section name. - // Current ABI: - // ----------------------------------------------------- - // ALL Names are prefixed with the symobl '@'. - // ------------------------------------------------------ - // Global variables do not have any '.' in their names. - // These are maily function names and global variable names. - // Example - @foo, @i - // Static local variables - @<func>.<var> - // ------------------------------------------------------- - // Functions and auto variables. - // Names are mangled as <prefix><funcname>.<tag>.<varname> - // Where <prefix> is '@' and <tag> is any one of - // the following - // .auto. - an automatic var of a function. - // .temp. - temproray data of a function. - // .ret. - return value label for a function. - // .frame. - Frame label for a function where retval, args - // and temps are stored. - // .args. - Label used to pass arguments to a direct call. - // Example - Function name: @foo - // Its frame: @foo.frame. - // Its retval: @foo.ret. - // Its local vars: @foo.auto.a - // Its temp data: @foo.temp. - // Its arg passing: @foo.args. - //---------------------------------------------- - // Libcall - compiler generated libcall names must start with .lib. - // This id will be used to emit extern decls for libcalls. - // Example - libcall name: @.lib.sra.i8 - // To pass args: @.lib.sra.i8.args. - // To return val: @.lib.sra.i8.ret. - //---------------------------------------------- - // SECTION Names - // uninitialized globals - @udata.<num>.# - // initialized globals - @idata.<num>.# - // Program memory data - @romdata.# - // Variables with user defined section name - <user_defined_section> - // Variables with user defined address - @<var>.user_section.<address>.# - // Function frame - @<func>.frame_section. - // Function autos - @<func>.autos_section. - // Overlay sections - @<color>.## - // Declarations - Enclosed in comments. No section for them. - //---------------------------------------------------------- - - // Tags used to mangle different names. - enum TAGS { - PREFIX_SYMBOL, - GLOBAL, - STATIC_LOCAL, - AUTOS_LABEL, - FRAME_LABEL, - RET_LABEL, - ARGS_LABEL, - TEMPS_LABEL, - - LIBCALL, - - FRAME_SECTION, - AUTOS_SECTION, - CODE_SECTION, - USER_SECTION - }; - - // Textual names of the tags. - inline static const char *getTagName(TAGS tag) { - switch (tag) { - default: return ""; - case PREFIX_SYMBOL: return "@"; - case AUTOS_LABEL: return ".auto."; - case FRAME_LABEL: return ".frame."; - case TEMPS_LABEL: return ".temp."; - case ARGS_LABEL: return ".args."; - case RET_LABEL: return ".ret."; - case LIBCALL: return ".lib."; - case FRAME_SECTION: return ".frame_section."; - case AUTOS_SECTION: return ".autos_section."; - case CODE_SECTION: return ".code_section."; - case USER_SECTION: return ".user_section."; - } - } - - // Get tag type for the Symbol. - inline static TAGS getSymbolTag(const std::string &Sym) { - if (Sym.find(getTagName(TEMPS_LABEL)) != std::string::npos) - return TEMPS_LABEL; - - if (Sym.find(getTagName(FRAME_LABEL)) != std::string::npos) - return FRAME_LABEL; - - if (Sym.find(getTagName(RET_LABEL)) != std::string::npos) - return RET_LABEL; - - if (Sym.find(getTagName(ARGS_LABEL)) != std::string::npos) - return ARGS_LABEL; - - if (Sym.find(getTagName(AUTOS_LABEL)) != std::string::npos) - return AUTOS_LABEL; - - if (Sym.find(getTagName(LIBCALL)) != std::string::npos) - return LIBCALL; - - // It does not have any Tag. So its a true global or static local. - if (Sym.find(".") == std::string::npos) - return GLOBAL; - - // If a . is there, then it may be static local. - // We should mangle these as well in clang. - if (Sym.find(".") != std::string::npos) - return STATIC_LOCAL; - - assert (0 && "Could not determine Symbol's tag"); - return PREFIX_SYMBOL; // Silence warning when assertions are turned off. - } - - // addPrefix - add prefix symbol to a name if there isn't one already. - inline static std::string addPrefix (const std::string &Name) { - std::string prefix = getTagName (PREFIX_SYMBOL); - - // If this name already has a prefix, nothing to do. - if (Name.compare(0, prefix.size(), prefix) == 0) - return Name; - - return prefix + Name; - } - - // Get mangled func name from a mangled sym name. - // In all cases func name is the first component before a '.'. - static inline std::string getFuncNameForSym(const std::string &Sym1) { - assert (getSymbolTag(Sym1) != GLOBAL && "not belongs to a function"); - - std::string Sym = addPrefix(Sym1); - - // Position of the . after func name. That's where func name ends. - size_t func_name_end = Sym.find ('.'); - - return Sym.substr (0, func_name_end); - } - - // Get Frame start label for a func. - static std::string getFrameLabel(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(FRAME_LABEL); - return Func1 + tag; - } - - // Get the retval label for the given function. - static std::string getRetvalLabel(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(RET_LABEL); - return Func1 + tag; - } - - // Get the argument label for the given function. - static std::string getArgsLabel(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(ARGS_LABEL); - return Func1 + tag; - } - - // Get the tempdata label for the given function. - static std::string getTempdataLabel(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(TEMPS_LABEL); - return Func1 + tag; - } - - static std::string getFrameSectionName(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(FRAME_SECTION); - return Func1 + tag + "#"; - } - - static std::string getAutosSectionName(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(AUTOS_SECTION); - return Func1 + tag + "#"; - } - - static std::string getCodeSectionName(const std::string &Func) { - std::string Func1 = addPrefix(Func); - std::string tag = getTagName(CODE_SECTION); - return Func1 + tag + "#"; - } - - static std::string getUserSectionName(const std::string &Name) { - std::string sname = addPrefix(Name);; - std::string tag = getTagName(USER_SECTION); - return sname + tag + "#"; - } - - // udata, romdata and idata section names are generated by a given number. - // @udata.<num>.# - static std::string getUdataSectionName(unsigned num, - std::string prefix = "") { - std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << prefix << "udata." << num - << ".#"; - return o.str(); - } - - static std::string getRomdataSectionName() { - return "romdata.#"; - } - - static std::string getSharedUDataSectionName() { - std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << "udata_shr" << ".#"; - return o.str(); - } - - static std::string getRomdataSectionName(unsigned num, - std::string prefix = "") { - std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << prefix << "romdata." << num - << ".#"; - return o.str(); - } - - static std::string getIdataSectionName(unsigned num, - std::string prefix = "") { - std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << prefix << "idata." << num - << ".#"; - return o.str(); - } - - inline static bool isLocalName (const std::string &Name) { - if (getSymbolTag(Name) == AUTOS_LABEL) - return true; - - return false; - } - - - inline static bool isMemIntrinsic (const std::string &Name) { - if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 || - Name.compare("@memmove") == 0) { - return true; - } - - return false; - } - - // Currently names of libcalls are assigned during TargetLowering - // object construction. There is no provision to change the when the - // code for a function IL function being generated. - // So we have to change these names while printing assembly. - // We need to do that mainly for names related to intrinsics. This - // function returns true if a name needs to be cloned. - inline static bool isIntrinsicStuff(const std::string &Name) { - // Return true if the name contains LIBCALL marker, or a MemIntrinisc. - // these are mainly ARGS_LABEL, RET_LABEL, and the LIBCALL name itself. - if ((Name.find(getTagName(LIBCALL)) != std::string::npos) - || isMemIntrinsic(Name)) - return true; - - return false; - } - - // Rename the name for IL. - inline static std::string Rename(const std::string &Name) { - std::string Newname; - // If its a label (LIBCALL+Func+LABEL), change it to - // (LIBCALL+Func+IL+LABEL). - TAGS id = getSymbolTag(Name); - if (id == ARGS_LABEL || id == RET_LABEL) { - std::size_t pos = Name.find(getTagName(id)); - Newname = Name.substr(0, pos) + ".IL" + getTagName(id); - return Newname; - } - - // Else, just append IL to name. - return Name + ".IL"; - } - - - - - inline static bool isLocalToFunc (std::string &Func, std::string &Var) { - if (! isLocalName(Var)) return false; - - std::string Func1 = addPrefix(Func); - // Extract func name of the varilable. - const std::string &fname = getFuncNameForSym(Var); - - if (fname.compare(Func1) == 0) - return true; - - return false; - } - - - // Get the section for the given external symbol names. - // This tries to find the type (Tag) of the symbol from its mangled name - // and return appropriate section name for it. - static inline std::string getSectionNameForSym(const std::string &Sym1) { - std::string Sym = addPrefix(Sym1); - - std::string SectionName; - - std::string Fname = getFuncNameForSym (Sym); - TAGS id = getSymbolTag (Sym); - - switch (id) { - default : assert (0 && "Could not determine external symbol type"); - case FRAME_LABEL: - case RET_LABEL: - case TEMPS_LABEL: - case ARGS_LABEL: { - return getFrameSectionName(Fname); - } - case AUTOS_LABEL: { - return getAutosSectionName(Fname); - } - } - } - - /// Return Overlay Name for the section. - /// The ABI Convention is: @<Color>.##.<section_tag> - /// The section_tag is retrieved from the SectName parameter and - /// and Color is passed in parameter. - static inline std::string getOverlayName(std::string SectName, int Color) { - // FIXME: Only autos_section and frame_section are colored. - // So check and assert if the passed SectName does not have AUTOS_SECTION - // or FRAME_SECTION tag in it. - std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << Color << ".##" - << SectName.substr(SectName.find(".")); - - return o.str(); - } - - // Return true if the current function is an ISR - inline static bool isISR(const std::string SectName) { - if (SectName.find("interrupt") != std::string::npos) - return true; - - return false; - } - - // Return the address for ISR starts in rom. - inline static std::string getISRAddr(void) { - return "0x4"; - } - - // Returns the name of clone of a function. - static std::string getCloneFnName(const std::string &Func) { - return (Func + ".IL"); - } - - // Returns the name of clone of a variable. - static std::string getCloneVarName(const std::string &Fn, - const std::string &Var) { - std::string cloneVarName = Var; - // These vars are named like fun.auto.var. - // Just replace the function name, with clone function name. - std::string cloneFnName = getCloneFnName(Fn); - cloneVarName.replace(cloneVarName.find(Fn), Fn.length(), cloneFnName); - return cloneVarName; - } - }; // class PAN. -} // end namespace llvm; - -#endif diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp deleted file mode 100644 index 7a948de..0000000 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ /dev/null @@ -1,490 +0,0 @@ - -//===-- PIC16DebugInfo.cpp - Implementation for PIC16 Debug Information ======// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the helper functions for representing debug information. -// -//===----------------------------------------------------------------------===// - -#include "PIC16.h" -#include "PIC16ABINames.h" -#include "PIC16DebugInfo.h" -#include "llvm/GlobalVariable.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -using namespace llvm; - -/// PopulateDebugInfo - Populate the TypeNo, Aux[] and TagName from Ty. -/// -void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TagName) { - if (Ty.isBasicType()) - PopulateBasicTypeInfo (Ty, TypeNo); - else if (Ty.isCompositeType()) - PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); - else if (Ty.isDerivedType()) - PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); - else { - TypeNo = PIC16Dbg::T_NULL; - HasAux = false; - } - return; -} - -/// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty. -/// -void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) { - std::string Name = Ty.getName(); - unsigned short BaseTy = GetTypeDebugNumber(Name); - TypeNo = TypeNo << PIC16Dbg::S_BASIC; - TypeNo = TypeNo | (0xffff & BaseTy); -} - -/// PopulateDerivedTypeInfo - Populate TypeNo, Aux[], TagName for derived type -/// from Ty. Derived types are mostly pointers. -/// -void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TagName) { - - switch(Ty.getTag()) - { - case dwarf::DW_TAG_pointer_type: - TypeNo = TypeNo << PIC16Dbg::S_DERIVED; - TypeNo = TypeNo | PIC16Dbg::DT_PTR; - break; - default: - TypeNo = TypeNo << PIC16Dbg::S_DERIVED; - } - - // We also need to encode the information about the base type of - // pointer in TypeNo. - DIType BaseType = DIDerivedType(Ty).getTypeDerivedFrom(); - PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName); -} - -/// PopulateArrayTypeInfo - Populate TypeNo, Aux[] for array from Ty. -void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TagName) { - - DICompositeType CTy = DICompositeType(Ty); - DIArray Elements = CTy.getTypeArray(); - unsigned short size = 1; - unsigned short Dimension[4]={0,0,0,0}; - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.getTag() == dwarf::DW_TAG_subrange_type) { - TypeNo = TypeNo << PIC16Dbg::S_DERIVED; - TypeNo = TypeNo | PIC16Dbg::DT_ARY; - DISubrange SubRange = DISubrange(Element); - Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1; - // Each dimension is represented by 2 bytes starting at byte 9. - Aux[8+i*2+0] = Dimension[i]; - Aux[8+i*2+1] = Dimension[i] >> 8; - size = size * Dimension[i]; - } - } - HasAux = true; - // In auxillary entry for array, 7th and 8th byte represent array size. - Aux[6] = size & 0xff; - Aux[7] = size >> 8; - DIType BaseType = CTy.getTypeDerivedFrom(); - PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName); -} - -/// PopulateStructOrUnionTypeInfo - Populate TypeNo, Aux[] , TagName for -/// structure or union. -/// -void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, - unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TagName) { - DICompositeType CTy = DICompositeType(Ty); - TypeNo = TypeNo << PIC16Dbg::S_BASIC; - if (Ty.getTag() == dwarf::DW_TAG_structure_type) - TypeNo = TypeNo | PIC16Dbg::T_STRUCT; - else - TypeNo = TypeNo | PIC16Dbg::T_UNION; - TagName = CTy.getName(); - // UniqueSuffix is .number where number is obtained from - // llvm.dbg.composite<number>. - // FIXME: This will break when composite type is not represented by - // llvm.dbg.composite* global variable. Since we need to revisit - // PIC16DebugInfo implementation anyways after the MDNodes based - // framework is done, let us continue with the way it is. - std::string UniqueSuffix = "." + Ty->getNameStr().substr(18); - TagName += UniqueSuffix; - unsigned short size = CTy.getSizeInBits()/8; - // 7th and 8th byte represent size. - HasAux = true; - Aux[6] = size & 0xff; - Aux[7] = size >> 8; -} - -/// PopulateEnumTypeInfo - Populate TypeNo for enum from Ty. -void PIC16DbgInfo::PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo) { - TypeNo = TypeNo << PIC16Dbg::S_BASIC; - TypeNo = TypeNo | PIC16Dbg::T_ENUM; -} - -/// PopulateCompositeTypeInfo - Populate TypeNo, Aux[] and TagName for -/// composite types from Ty. -/// -void PIC16DbgInfo::PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TagName) { - switch (Ty.getTag()) { - case dwarf::DW_TAG_array_type: { - PopulateArrayTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); - break; - } - case dwarf:: DW_TAG_union_type: - case dwarf::DW_TAG_structure_type: { - PopulateStructOrUnionTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); - break; - } - case dwarf::DW_TAG_enumeration_type: { - PopulateEnumTypeInfo (Ty, TypeNo); - break; - } - default: - TypeNo = TypeNo << PIC16Dbg::S_DERIVED; - } -} - -/// GetTypeDebugNumber - Get debug type number for given type. -/// -unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) { - if (type == "char") - return PIC16Dbg::T_CHAR; - else if (type == "short") - return PIC16Dbg::T_SHORT; - else if (type == "int") - return PIC16Dbg::T_INT; - else if (type == "long") - return PIC16Dbg::T_LONG; - else if (type == "unsigned char") - return PIC16Dbg::T_UCHAR; - else if (type == "unsigned short") - return PIC16Dbg::T_USHORT; - else if (type == "unsigned int") - return PIC16Dbg::T_UINT; - else if (type == "unsigned long") - return PIC16Dbg::T_ULONG; - else - return 0; -} - -/// GetStorageClass - Get storage class for give debug variable. -/// -short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) { - short ClassNo; - if (PAN::isLocalName(DIGV.getName())) { - // Generating C_AUTO here fails due to error in linker. Change it once - // linker is fixed. - ClassNo = PIC16Dbg::C_STAT; - } - else if (DIGV.isLocalToUnit()) - ClassNo = PIC16Dbg::C_STAT; - else - ClassNo = PIC16Dbg::C_EXT; - return ClassNo; -} - -/// BeginModule - Emit necessary debug info to start a Module and do other -/// required initializations. -void PIC16DbgInfo::BeginModule(Module &M) { - // Emit file directive for module. - DebugInfoFinder DbgFinder; - DbgFinder.processModule(M); - if (DbgFinder.compile_unit_count() != 0) { - // FIXME : What if more then one CUs are present in a module ? - MDNode *CU = *DbgFinder.compile_unit_begin(); - EmitDebugDirectives = true; - SwitchToCU(CU); - } - // Emit debug info for decls of composite types. - EmitCompositeTypeDecls(M); -} - -/// Helper to find first valid debug loc for a function. -/// -static const DebugLoc GetDebugLocForFunction(const MachineFunction &MF) { - DebugLoc DL; - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - DL = II->getDebugLoc(); - if (!DL.isUnknown()) - return DL; - } - } - return DL; -} - -/// BeginFunction - Emit necessary debug info to start a function. -/// -void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) { - if (! EmitDebugDirectives) return; - - // Retreive the first valid debug Loc and process it. - const DebugLoc &DL = GetDebugLocForFunction(MF); - // Emit debug info only if valid debug info is available. - if (!DL.isUnknown()) { - ChangeDebugLoc(MF, DL, true); - EmitFunctBeginDI(MF.getFunction()); - } - // Set current line to 0 so that.line directive is genearted after .bf. - CurLine = 0; -} - -/// ChangeDebugLoc - Take necessary steps when DebugLoc changes. -/// CurFile and CurLine may change as a result of this. -/// -void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF, - const DebugLoc &DL, bool IsInBeginFunction) { - if (!EmitDebugDirectives) return; - assert(!DL.isUnknown() && "can't change to invalid debug loc"); - - SwitchToCU(DL.getScope(MF.getFunction()->getContext())); - SwitchToLine(DL.getLine(), IsInBeginFunction); -} - -/// SwitchToLine - Emit line directive for a new line. -/// -void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) { - if (CurLine == Line) return; - if (!IsInBeginFunction) - OS.EmitRawText("\n\t.line " + Twine(Line)); - CurLine = Line; -} - -/// EndFunction - Emit .ef for end of function. -/// -void PIC16DbgInfo::EndFunction(const MachineFunction &MF) { - if (! EmitDebugDirectives) return; - const DebugLoc &DL = GetDebugLocForFunction(MF); - // Emit debug info only if valid debug info is available. - if (!DL.isUnknown()) - EmitFunctEndDI(MF.getFunction(), CurLine); -} - -/// EndModule - Emit .eof for end of module. -/// -void PIC16DbgInfo::EndModule(Module &M) { - if (! EmitDebugDirectives) return; - EmitVarDebugInfo(M); - if (CurFile != "") OS.EmitRawText(StringRef("\n\t.eof")); -} - -/// EmitCompositeTypeElements - Emit debug information for members of a -/// composite type. -/// -void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, - std::string SuffixNo) { - unsigned long Value = 0; - DIArray Elements = CTy.getTypeArray(); - for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) { - DIDescriptor Element = Elements.getElement(i); - unsigned short TypeNo = 0; - bool HasAux = false; - int ElementAux[PIC16Dbg::AuxSize] = { 0 }; - std::string TagName = ""; - DIDerivedType DITy(Element); - unsigned short ElementSize = DITy.getSizeInBits()/8; - // Get mangleddd name for this structure/union element. - std::string MangMemName = DITy.getName().str() + SuffixNo; - PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName); - short Class = 0; - if( CTy.getTag() == dwarf::DW_TAG_union_type) - Class = PIC16Dbg::C_MOU; - else if (CTy.getTag() == dwarf::DW_TAG_structure_type) - Class = PIC16Dbg::C_MOS; - EmitSymbol(MangMemName.c_str(), Class, TypeNo, Value); - if (CTy.getTag() == dwarf::DW_TAG_structure_type) - Value += ElementSize; - if (HasAux) - EmitAuxEntry(MangMemName.c_str(), ElementAux, PIC16Dbg::AuxSize, TagName); - } -} - -/// EmitCompositeTypeDecls - Emit composite type declarations like structure -/// and union declarations. -/// -void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { - DebugInfoFinder DbgFinder; - DbgFinder.processModule(M); - for (DebugInfoFinder::iterator I = DbgFinder.type_begin(), - E = DbgFinder.type_end(); I != E; ++I) { - DICompositeType CTy(*I); - if (!CTy.Verify()) - continue; - if (CTy.getTag() == dwarf::DW_TAG_union_type || - CTy.getTag() == dwarf::DW_TAG_structure_type ) { - // Get the number after llvm.dbg.composite and make UniqueSuffix from - // it. - std::string DIVar = CTy->getNameStr(); - std::string UniqueSuffix = "." + DIVar.substr(18); - std::string MangledCTyName = CTy.getName().str() + UniqueSuffix; - unsigned short size = CTy.getSizeInBits()/8; - int Aux[PIC16Dbg::AuxSize] = {0}; - // 7th and 8th byte represent size of structure/union. - Aux[6] = size & 0xff; - Aux[7] = size >> 8; - // Emit .def for structure/union tag. - if( CTy.getTag() == dwarf::DW_TAG_union_type) - EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_UNTAG); - else if (CTy.getTag() == dwarf::DW_TAG_structure_type) - EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_STRTAG); - - // Emit auxiliary debug information for structure/union tag. - EmitAuxEntry(MangledCTyName.c_str(), Aux, PIC16Dbg::AuxSize); - - // Emit members. - EmitCompositeTypeElements (CTy, UniqueSuffix); - - // Emit mangled Symbol for end of structure/union. - std::string EOSSymbol = ".eos" + UniqueSuffix; - EmitSymbol(EOSSymbol.c_str(), PIC16Dbg::C_EOS); - EmitAuxEntry(EOSSymbol.c_str(), Aux, PIC16Dbg::AuxSize, - MangledCTyName.c_str()); - } - } -} - - -/// EmitFunctBeginDI - Emit .bf for function. -/// -void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { - std::string FunctName = F->getName(); - if (EmitDebugDirectives) { - std::string FunctBeginSym = ".bf." + FunctName; - std::string BlockBeginSym = ".bb." + FunctName; - - int BFAux[PIC16Dbg::AuxSize] = {0}; - BFAux[4] = CurLine; - BFAux[5] = CurLine >> 8; - - // Emit debug directives for beginning of function. - EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN); - EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize); - - EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK); - EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize); - } -} - -/// EmitFunctEndDI - Emit .ef for function end. -/// -void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { - std::string FunctName = F->getName(); - if (EmitDebugDirectives) { - std::string FunctEndSym = ".ef." + FunctName; - std::string BlockEndSym = ".eb." + FunctName; - - // Emit debug directives for end of function. - EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK); - int EFAux[PIC16Dbg::AuxSize] = {0}; - // 5th and 6th byte stand for line number. - EFAux[4] = CurLine; - EFAux[5] = CurLine >> 8; - EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize); - EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN); - EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize); - } -} - -/// EmitAuxEntry - Emit Auxiliary debug information. -/// -void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num, - std::string TagName) { - std::string Tmp; - // TagName is emitted in case of structure/union objects. - if (!TagName.empty()) Tmp += ", " + TagName; - - for (int i = 0; i<Num; i++) - Tmp += "," + utostr(Aux[i] & 0xff); - - OS.EmitRawText("\n\t.dim " + Twine(VarName) + ", 1" + Tmp); -} - -/// EmitSymbol - Emit .def for a symbol. Value is offset for the member. -/// -void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, - unsigned short Type, unsigned long Value) { - std::string Tmp; - if (Value > 0) - Tmp = ", value = " + utostr(Value); - - OS.EmitRawText("\n\t.def " + Twine(Name) + ", type = " + utostr(Type) + - ", class = " + utostr(Class) + Tmp); -} - -/// EmitVarDebugInfo - Emit debug information for all variables. -/// -void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { - DebugInfoFinder DbgFinder; - DbgFinder.processModule(M); - - for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) { - DIGlobalVariable DIGV(*I); - DIType Ty = DIGV.getType(); - unsigned short TypeNo = 0; - bool HasAux = false; - int Aux[PIC16Dbg::AuxSize] = { 0 }; - std::string TagName = ""; - std::string VarName = DIGV.getName(); - VarName = MAI->getGlobalPrefix() + VarName; - PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName); - // Emit debug info only if type information is availaible. - if (TypeNo != PIC16Dbg::T_NULL) { - OS.EmitRawText("\t.type " + Twine(VarName) + ", " + Twine(TypeNo)); - short ClassNo = getStorageClass(DIGV); - OS.EmitRawText("\t.class " + Twine(VarName) + ", " + Twine(ClassNo)); - if (HasAux) - EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName); - } - } -} - -/// SwitchToCU - Switch to a new compilation unit. -/// -void PIC16DbgInfo::SwitchToCU(MDNode *CU) { - // Get the file path from CU. - DICompileUnit cu(CU); - std::string DirName = cu.getDirectory(); - std::string FileName = cu.getFilename(); - std::string FilePath = DirName + "/" + FileName; - - // Nothing to do if source file is still same. - if ( FilePath == CurFile ) return; - - // Else, close the current one and start a new. - if (CurFile != "") - OS.EmitRawText(StringRef("\t.eof")); - OS.EmitRawText("\n\t.file\t\"" + Twine(FilePath) + "\""); - CurFile = FilePath; - CurLine = 0; -} - -/// EmitEOF - Emit .eof for end of file. -/// -void PIC16DbgInfo::EmitEOF() { - if (CurFile != "") - OS.EmitRawText(StringRef("\t.EOF")); -} - diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h deleted file mode 100644 index 031dcf0..0000000 --- a/lib/Target/PIC16/PIC16DebugInfo.h +++ /dev/null @@ -1,156 +0,0 @@ -//===-- PIC16DebugInfo.h - Interfaces for PIC16 Debug Information ============// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the helper functions for representing debug information. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16DBG_H -#define PIC16DBG_H - -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Module.h" - -namespace llvm { - class MachineFunction; - class DebugLoc; - class MCStreamer; - - namespace PIC16Dbg { - enum VarType { - T_NULL, - T_VOID, - T_CHAR, - T_SHORT, - T_INT, - T_LONG, - T_FLOAT, - T_DOUBLE, - T_STRUCT, - T_UNION, - T_ENUM, - T_MOE, - T_UCHAR, - T_USHORT, - T_UINT, - T_ULONG - }; - enum DerivedType { - DT_NONE, - DT_PTR, - DT_FCN, - DT_ARY - }; - enum TypeSize { - S_BASIC = 5, - S_DERIVED = 3 - }; - enum DbgClass { - C_NULL, - C_AUTO, - C_EXT, - C_STAT, - C_REG, - C_EXTDEF, - C_LABEL, - C_ULABEL, - C_MOS, - C_ARG, - C_STRTAG, - C_MOU, - C_UNTAG, - C_TPDEF, - C_USTATIC, - C_ENTAG, - C_MOE, - C_REGPARM, - C_FIELD, - C_AUTOARG, - C_LASTENT, - C_BLOCK = 100, - C_FCN, - C_EOS, - C_FILE, - C_LINE, - C_ALIAS, - C_HIDDEN, - C_EOF, - C_LIST, - C_SECTION, - C_EFCN = 255 - }; - enum SymbolSize { - AuxSize =20 - }; - } - - class PIC16DbgInfo { - MCStreamer &OS; - const MCAsmInfo *MAI; - std::string CurFile; - unsigned CurLine; - - // EmitDebugDirectives is set if debug information is available. Default - // value for it is false. - bool EmitDebugDirectives; - - public: - PIC16DbgInfo(MCStreamer &os, const MCAsmInfo *T) : OS(os), MAI(T) { - CurFile = ""; - CurLine = 0; - EmitDebugDirectives = false; - } - - void BeginModule (Module &M); - void BeginFunction (const MachineFunction &MF); - void ChangeDebugLoc (const MachineFunction &MF, const DebugLoc &DL, - bool IsInBeginFunction = false); - void EndFunction (const MachineFunction &MF); - void EndModule (Module &M); - - - private: - void SwitchToCU (MDNode *CU); - void SwitchToLine (unsigned Line, bool IsInBeginFunction = false); - - void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, - int Aux[], std::string &TypeName); - void PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo); - void PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TypeName); - - void PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TypeName); - void PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TypeName); - - void PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo, - bool &HasAux, int Aux[], - std::string &TypeName); - void PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo); - - unsigned GetTypeDebugNumber(std::string &Type); - short getStorageClass(DIGlobalVariable DIGV); - void EmitFunctBeginDI(const Function *F); - void EmitCompositeTypeDecls(Module &M); - void EmitCompositeTypeElements (DICompositeType CTy, std::string Suffix); - void EmitFunctEndDI(const Function *F, unsigned Line); - void EmitAuxEntry(const std::string VarName, int Aux[], - int num = PIC16Dbg::AuxSize, std::string TagName = ""); - inline void EmitSymbol(std::string Name, short Class, - unsigned short Type = PIC16Dbg::T_NULL, - unsigned long Value = 0); - void EmitVarDebugInfo(Module &M); - void EmitEOF(); - }; -} // end namespace llvm; -#endif diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp deleted file mode 100644 index 6cbd002..0000000 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp +++ /dev/null @@ -1,50 +0,0 @@ -//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the PIC16 target. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pic16-isel" - -#include "llvm/Support/ErrorHandling.h" -#include "PIC16ISelDAGToDAG.h" -using namespace llvm; - -/// createPIC16ISelDag - This pass converts a legalized DAG into a -/// PIC16-specific DAG, ready for instruction scheduling. -FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) { - return new PIC16DAGToDAGISel(TM); -} - - -/// Select - Select instructions not customized! Used for -/// expanded, promoted and normal instructions. -SDNode* PIC16DAGToDAGISel::Select(SDNode *N) { - - // Select the default instruction. - SDNode *ResNode = SelectCode(N); - - return ResNode; -} - - -// SelectDirectAddr - Match a direct address for DAG. -// A direct address could be a globaladdress or externalsymbol. -bool PIC16DAGToDAGISel::SelectDirectAddr(SDNode *Op, SDValue N, - SDValue &Address) { - // Return true if TGA or ES. - if (N.getOpcode() == ISD::TargetGlobalAddress - || N.getOpcode() == ISD::TargetExternalSymbol) { - Address = N; - return true; - } - - return false; -} diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h deleted file mode 100644 index ecaddd3..0000000 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ /dev/null @@ -1,60 +0,0 @@ -//===-- PIC16ISelDAGToDAG.cpp - A dag to dag inst selector for PIC16 ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the PIC16 target. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pic16-isel" - -#include "PIC16.h" -#include "PIC16RegisterInfo.h" -#include "PIC16TargetMachine.h" -#include "PIC16MachineFunctionInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" -#include "llvm/Intrinsics.h" -using namespace llvm; - -namespace { - -class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel { - - /// TM - Keep a reference to PIC16TargetMachine. - const PIC16TargetMachine &TM; - - /// PIC16Lowering - This object fully describes how to lower LLVM code to an - /// PIC16-specific SelectionDAG. - const PIC16TargetLowering &PIC16Lowering; - -public: - explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), PIC16Lowering(*TM.getTargetLowering()) {} - - // Pass Name - virtual const char *getPassName() const { - return "PIC16 DAG->DAG Pattern Instruction Selection"; - } - -private: - // Include the pieces autogenerated from the target description. -#include "PIC16GenDAGISel.inc" - - SDNode *Select(SDNode *N); - - // Match direct address complex pattern. - bool SelectDirectAddr(SDNode *Op, SDValue N, SDValue &Address); - -}; - -} - diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp deleted file mode 100644 index 527b31d..0000000 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ /dev/null @@ -1,2000 +0,0 @@ -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that PIC16 uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pic16-lower" -#include "PIC16ABINames.h" -#include "PIC16ISelLowering.h" -#include "PIC16TargetObjectFile.h" -#include "PIC16TargetMachine.h" -#include "PIC16MachineFunctionInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalValue.h" -#include "llvm/Function.h" -#include "llvm/CallingConv.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Support/ErrorHandling.h" - - -using namespace llvm; - -static const char *getIntrinsicName(unsigned opcode) { - std::string Basename; - switch(opcode) { - default: llvm_unreachable("do not know intrinsic name"); - // Arithmetic Right shift for integer types. - case PIC16ISD::SRA_I8: Basename = "sra.i8"; break; - case RTLIB::SRA_I16: Basename = "sra.i16"; break; - case RTLIB::SRA_I32: Basename = "sra.i32"; break; - - // Left shift for integer types. - case PIC16ISD::SLL_I8: Basename = "sll.i8"; break; - case RTLIB::SHL_I16: Basename = "sll.i16"; break; - case RTLIB::SHL_I32: Basename = "sll.i32"; break; - - // Logical Right Shift for integer types. - case PIC16ISD::SRL_I8: Basename = "srl.i8"; break; - case RTLIB::SRL_I16: Basename = "srl.i16"; break; - case RTLIB::SRL_I32: Basename = "srl.i32"; break; - - // Multiply for integer types. - case PIC16ISD::MUL_I8: Basename = "mul.i8"; break; - case RTLIB::MUL_I16: Basename = "mul.i16"; break; - case RTLIB::MUL_I32: Basename = "mul.i32"; break; - - // Signed division for integers. - case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break; - case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break; - - // Unsigned division for integers. - case RTLIB::UDIV_I16: Basename = "udiv.i16"; break; - case RTLIB::UDIV_I32: Basename = "udiv.i32"; break; - - // Signed Modulas for integers. - case RTLIB::SREM_I16: Basename = "srem.i16"; break; - case RTLIB::SREM_I32: Basename = "srem.i32"; break; - - // Unsigned Modulas for integers. - case RTLIB::UREM_I16: Basename = "urem.i16"; break; - case RTLIB::UREM_I32: Basename = "urem.i32"; break; - - ////////////////////// - // LIBCALLS FOR FLOATS - ////////////////////// - - // Float to signed integrals - case RTLIB::FPTOSINT_F32_I8: Basename = "f32_to_si32"; break; - case RTLIB::FPTOSINT_F32_I16: Basename = "f32_to_si32"; break; - case RTLIB::FPTOSINT_F32_I32: Basename = "f32_to_si32"; break; - - // Signed integrals to float. char and int are first sign extended to i32 - // before being converted to float, so an I8_F32 or I16_F32 isn't required. - case RTLIB::SINTTOFP_I32_F32: Basename = "si32_to_f32"; break; - - // Float to Unsigned conversions. - // Signed conversion can be used for unsigned conversion as well. - // In signed and unsigned versions only the interpretation of the - // MSB is different. Bit representation remains the same. - case RTLIB::FPTOUINT_F32_I8: Basename = "f32_to_si32"; break; - case RTLIB::FPTOUINT_F32_I16: Basename = "f32_to_si32"; break; - case RTLIB::FPTOUINT_F32_I32: Basename = "f32_to_si32"; break; - - // Unsigned to Float conversions. char and int are first zero extended - // before being converted to float. - case RTLIB::UINTTOFP_I32_F32: Basename = "ui32_to_f32"; break; - - // Floating point add, sub, mul, div. - case RTLIB::ADD_F32: Basename = "add.f32"; break; - case RTLIB::SUB_F32: Basename = "sub.f32"; break; - case RTLIB::MUL_F32: Basename = "mul.f32"; break; - case RTLIB::DIV_F32: Basename = "div.f32"; break; - - // Floating point comparison - case RTLIB::O_F32: Basename = "unordered.f32"; break; - case RTLIB::UO_F32: Basename = "unordered.f32"; break; - case RTLIB::OLE_F32: Basename = "le.f32"; break; - case RTLIB::OGE_F32: Basename = "ge.f32"; break; - case RTLIB::OLT_F32: Basename = "lt.f32"; break; - case RTLIB::OGT_F32: Basename = "gt.f32"; break; - case RTLIB::OEQ_F32: Basename = "eq.f32"; break; - case RTLIB::UNE_F32: Basename = "neq.f32"; break; - } - - std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); - std::string tagname = PAN::getTagName(PAN::LIBCALL); - std::string Fullname = prefix + tagname + Basename; - - // The name has to live through program life. - return ESNames::createESName(Fullname); -} - -// getStdLibCallName - Get the name for the standard library function. -static const char *getStdLibCallName(unsigned opcode) { - std::string BaseName; - switch(opcode) { - case RTLIB::COS_F32: BaseName = "cos"; - break; - case RTLIB::SIN_F32: BaseName = "sin"; - break; - case RTLIB::MEMCPY: BaseName = "memcpy"; - break; - case RTLIB::MEMSET: BaseName = "memset"; - break; - case RTLIB::MEMMOVE: BaseName = "memmove"; - break; - default: llvm_unreachable("do not know std lib call name"); - } - std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); - std::string LibCallName = prefix + BaseName; - - // The name has to live through program life. - return ESNames::createESName(LibCallName); -} - -// PIC16TargetLowering Constructor. -PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) - : TargetLowering(TM, new PIC16TargetObjectFile()) { - - Subtarget = &TM.getSubtarget<PIC16Subtarget>(); - - addRegisterClass(MVT::i8, PIC16::GPRRegisterClass); - - setShiftAmountType(MVT::i8); - - // Std lib call names - setLibcallName(RTLIB::COS_F32, getStdLibCallName(RTLIB::COS_F32)); - setLibcallName(RTLIB::SIN_F32, getStdLibCallName(RTLIB::SIN_F32)); - setLibcallName(RTLIB::MEMCPY, getStdLibCallName(RTLIB::MEMCPY)); - setLibcallName(RTLIB::MEMSET, getStdLibCallName(RTLIB::MEMSET)); - setLibcallName(RTLIB::MEMMOVE, getStdLibCallName(RTLIB::MEMMOVE)); - - // SRA library call names - setPIC16LibcallName(PIC16ISD::SRA_I8, getIntrinsicName(PIC16ISD::SRA_I8)); - setLibcallName(RTLIB::SRA_I16, getIntrinsicName(RTLIB::SRA_I16)); - setLibcallName(RTLIB::SRA_I32, getIntrinsicName(RTLIB::SRA_I32)); - - // SHL library call names - setPIC16LibcallName(PIC16ISD::SLL_I8, getIntrinsicName(PIC16ISD::SLL_I8)); - setLibcallName(RTLIB::SHL_I16, getIntrinsicName(RTLIB::SHL_I16)); - setLibcallName(RTLIB::SHL_I32, getIntrinsicName(RTLIB::SHL_I32)); - - // SRL library call names - setPIC16LibcallName(PIC16ISD::SRL_I8, getIntrinsicName(PIC16ISD::SRL_I8)); - setLibcallName(RTLIB::SRL_I16, getIntrinsicName(RTLIB::SRL_I16)); - setLibcallName(RTLIB::SRL_I32, getIntrinsicName(RTLIB::SRL_I32)); - - // MUL Library call names - setPIC16LibcallName(PIC16ISD::MUL_I8, getIntrinsicName(PIC16ISD::MUL_I8)); - setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16)); - setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32)); - - // Signed division lib call names - setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16)); - setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32)); - - // Unsigned division lib call names - setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16)); - setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32)); - - // Signed remainder lib call names - setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16)); - setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32)); - - // Unsigned remainder lib call names - setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16)); - setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32)); - - // Floating point to signed int conversions. - setLibcallName(RTLIB::FPTOSINT_F32_I8, - getIntrinsicName(RTLIB::FPTOSINT_F32_I8)); - setLibcallName(RTLIB::FPTOSINT_F32_I16, - getIntrinsicName(RTLIB::FPTOSINT_F32_I16)); - setLibcallName(RTLIB::FPTOSINT_F32_I32, - getIntrinsicName(RTLIB::FPTOSINT_F32_I32)); - - // Signed int to floats. - setLibcallName(RTLIB::SINTTOFP_I32_F32, - getIntrinsicName(RTLIB::SINTTOFP_I32_F32)); - - // Floating points to unsigned ints. - setLibcallName(RTLIB::FPTOUINT_F32_I8, - getIntrinsicName(RTLIB::FPTOUINT_F32_I8)); - setLibcallName(RTLIB::FPTOUINT_F32_I16, - getIntrinsicName(RTLIB::FPTOUINT_F32_I16)); - setLibcallName(RTLIB::FPTOUINT_F32_I32, - getIntrinsicName(RTLIB::FPTOUINT_F32_I32)); - - // Unsigned int to floats. - setLibcallName(RTLIB::UINTTOFP_I32_F32, - getIntrinsicName(RTLIB::UINTTOFP_I32_F32)); - - // Floating point add, sub, mul ,div. - setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32)); - setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32)); - setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32)); - setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32)); - - // Floationg point comparison - setLibcallName(RTLIB::O_F32, getIntrinsicName(RTLIB::O_F32)); - setLibcallName(RTLIB::UO_F32, getIntrinsicName(RTLIB::UO_F32)); - setLibcallName(RTLIB::OLE_F32, getIntrinsicName(RTLIB::OLE_F32)); - setLibcallName(RTLIB::OGE_F32, getIntrinsicName(RTLIB::OGE_F32)); - setLibcallName(RTLIB::OLT_F32, getIntrinsicName(RTLIB::OLT_F32)); - setLibcallName(RTLIB::OGT_F32, getIntrinsicName(RTLIB::OGT_F32)); - setLibcallName(RTLIB::OEQ_F32, getIntrinsicName(RTLIB::OEQ_F32)); - setLibcallName(RTLIB::UNE_F32, getIntrinsicName(RTLIB::UNE_F32)); - - // Return value comparisons of floating point calls. - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); - - setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); - setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); - - setOperationAction(ISD::LOAD, MVT::i8, Legal); - setOperationAction(ISD::LOAD, MVT::i16, Custom); - setOperationAction(ISD::LOAD, MVT::i32, Custom); - - setOperationAction(ISD::STORE, MVT::i8, Legal); - setOperationAction(ISD::STORE, MVT::i16, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::i64, Custom); - - setOperationAction(ISD::ADDE, MVT::i8, Custom); - setOperationAction(ISD::ADDC, MVT::i8, Custom); - setOperationAction(ISD::SUBE, MVT::i8, Custom); - setOperationAction(ISD::SUBC, MVT::i8, Custom); - setOperationAction(ISD::SUB, MVT::i8, Custom); - setOperationAction(ISD::ADD, MVT::i8, Custom); - setOperationAction(ISD::ADD, MVT::i16, Custom); - - setOperationAction(ISD::OR, MVT::i8, Custom); - setOperationAction(ISD::AND, MVT::i8, Custom); - setOperationAction(ISD::XOR, MVT::i8, Custom); - - setOperationAction(ISD::FrameIndex, MVT::i16, Custom); - - setOperationAction(ISD::MUL, MVT::i8, Custom); - - setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::MULHU, MVT::i8, Expand); - setOperationAction(ISD::MULHS, MVT::i8, Expand); - - setOperationAction(ISD::SRA, MVT::i8, Custom); - setOperationAction(ISD::SHL, MVT::i8, Custom); - setOperationAction(ISD::SRL, MVT::i8, Custom); - - setOperationAction(ISD::ROTL, MVT::i8, Expand); - setOperationAction(ISD::ROTR, MVT::i8, Expand); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - // PIC16 does not support shift parts - setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand); - setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand); - - - // PIC16 does not have a SETCC, expand it to SELECT_CC. - setOperationAction(ISD::SETCC, MVT::i8, Expand); - setOperationAction(ISD::SELECT, MVT::i8, Expand); - setOperationAction(ISD::BRCOND, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); - - setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); - setOperationAction(ISD::BR_CC, MVT::i8, Custom); - - //setOperationAction(ISD::TRUNCATE, MVT::i16, Custom); - setTruncStoreAction(MVT::i16, MVT::i8, Custom); - - // Now deduce the information based on the above mentioned - // actions - computeRegisterProperties(); -} - -std::pair<const TargetRegisterClass*, uint8_t> -PIC16TargetLowering::findRepresentativeClass(EVT VT) const { - switch (VT.getSimpleVT().SimpleTy) { - default: - return TargetLowering::findRepresentativeClass(VT); - case MVT::i16: - return std::make_pair(PIC16::FSR16RegisterClass, 1); - } -} - -// getOutFlag - Extract the flag result if the Op has it. -static SDValue getOutFlag(SDValue &Op) { - // Flag is the last value of the node. - SDValue Flag = Op.getValue(Op.getNode()->getNumValues() - 1); - - assert (Flag.getValueType() == MVT::Flag - && "Node does not have an out Flag"); - - return Flag; -} -// Get the TmpOffset for FrameIndex -unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size, - MachineFunction &MF) const { - PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); - std::map<unsigned, unsigned> &FiTmpOffsetMap = FuncInfo->getFiTmpOffsetMap(); - - std::map<unsigned, unsigned>::iterator - MapIt = FiTmpOffsetMap.find(FI); - if (MapIt != FiTmpOffsetMap.end()) - return MapIt->second; - - // This FI (FrameIndex) is not yet mapped, so map it - FiTmpOffsetMap[FI] = FuncInfo->getTmpSize(); - FuncInfo->setTmpSize(FuncInfo->getTmpSize() + size); - return FiTmpOffsetMap[FI]; -} - -void PIC16TargetLowering::ResetTmpOffsetMap(SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); - FuncInfo->getFiTmpOffsetMap().clear(); - FuncInfo->setTmpSize(0); -} - -// To extract chain value from the SDValue Nodes -// This function will help to maintain the chain extracting -// code at one place. In case of any change in future it will -// help maintain the code. -static SDValue getChain(SDValue &Op) { - SDValue Chain = Op.getValue(Op.getNode()->getNumValues() - 1); - - // If the last value returned in Flag then the chain is - // second last value returned. - if (Chain.getValueType() == MVT::Flag) - Chain = Op.getValue(Op.getNode()->getNumValues() - 2); - - // All nodes may not produce a chain. Therefore following assert - // verifies that the node is returning a chain only. - assert (Chain.getValueType() == MVT::Other - && "Node does not have a chain"); - - return Chain; -} - -/// PopulateResults - Helper function to LowerOperation. -/// If a node wants to return multiple results after lowering, -/// it stuffs them into an array of SDValue called Results. - -static void PopulateResults(SDValue N, SmallVectorImpl<SDValue>&Results) { - if (N.getOpcode() == ISD::MERGE_VALUES) { - int NumResults = N.getNumOperands(); - for( int i = 0; i < NumResults; i++) - Results.push_back(N.getOperand(i)); - } - else - Results.push_back(N); -} - -MVT::SimpleValueType -PIC16TargetLowering::getSetCCResultType(EVT ValType) const { - return MVT::i8; -} - -MVT::SimpleValueType -PIC16TargetLowering::getCmpLibcallReturnType() const { - return MVT::i8; -} - -/// The type legalizer framework of generating legalizer can generate libcalls -/// only when the operand/result types are illegal. -/// PIC16 needs to generate libcalls even for the legal types (i8) for some ops. -/// For example an arithmetic right shift. These functions are used to lower -/// such operations that generate libcall for legal types. - -void -PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, - const char *Name) { - PIC16LibcallNames[Call] = Name; -} - -const char * -PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const { - return PIC16LibcallNames[Call]; -} - -SDValue -PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, - EVT RetVT, const SDValue *Ops, - unsigned NumOps, bool isSigned, - SelectionDAG &DAG, DebugLoc dl) const { - - TargetLowering::ArgListTy Args; - Args.reserve(NumOps); - - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0; i != NumOps; ++i) { - Entry.Node = Ops[i]; - Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; - Args.push_back(Entry); - } - - SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i16); - - const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - std::pair<SDValue,SDValue> CallInfo = - LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, CallingConv::C, false, - /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - - return CallInfo.first; -} - -const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return NULL; - case PIC16ISD::Lo: return "PIC16ISD::Lo"; - case PIC16ISD::Hi: return "PIC16ISD::Hi"; - case PIC16ISD::MTLO: return "PIC16ISD::MTLO"; - case PIC16ISD::MTHI: return "PIC16ISD::MTHI"; - case PIC16ISD::MTPCLATH: return "PIC16ISD::MTPCLATH"; - case PIC16ISD::PIC16Connect: return "PIC16ISD::PIC16Connect"; - case PIC16ISD::Banksel: return "PIC16ISD::Banksel"; - case PIC16ISD::PIC16Load: return "PIC16ISD::PIC16Load"; - case PIC16ISD::PIC16LdArg: return "PIC16ISD::PIC16LdArg"; - case PIC16ISD::PIC16LdWF: return "PIC16ISD::PIC16LdWF"; - case PIC16ISD::PIC16Store: return "PIC16ISD::PIC16Store"; - case PIC16ISD::PIC16StWF: return "PIC16ISD::PIC16StWF"; - case PIC16ISD::BCF: return "PIC16ISD::BCF"; - case PIC16ISD::LSLF: return "PIC16ISD::LSLF"; - case PIC16ISD::LRLF: return "PIC16ISD::LRLF"; - case PIC16ISD::RLF: return "PIC16ISD::RLF"; - case PIC16ISD::RRF: return "PIC16ISD::RRF"; - case PIC16ISD::CALL: return "PIC16ISD::CALL"; - case PIC16ISD::CALLW: return "PIC16ISD::CALLW"; - case PIC16ISD::SUBCC: return "PIC16ISD::SUBCC"; - case PIC16ISD::SELECT_ICC: return "PIC16ISD::SELECT_ICC"; - case PIC16ISD::BRCOND: return "PIC16ISD::BRCOND"; - case PIC16ISD::RET: return "PIC16ISD::RET"; - case PIC16ISD::Dummy: return "PIC16ISD::Dummy"; - } -} - -void PIC16TargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) const { - - switch (N->getOpcode()) { - case ISD::GlobalAddress: - Results.push_back(ExpandGlobalAddress(N, DAG)); - return; - case ISD::ExternalSymbol: - Results.push_back(ExpandExternalSymbol(N, DAG)); - return; - case ISD::STORE: - Results.push_back(ExpandStore(N, DAG)); - return; - case ISD::LOAD: - PopulateResults(ExpandLoad(N, DAG), Results); - return; - case ISD::ADD: - // Results.push_back(ExpandAdd(N, DAG)); - return; - case ISD::FrameIndex: - Results.push_back(ExpandFrameIndex(N, DAG)); - return; - default: - assert (0 && "not implemented"); - return; - } -} - -SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, - SelectionDAG &DAG) const { - - // Currently handling FrameIndex of size MVT::i16 only - // One example of this scenario is when return value is written on - // FrameIndex#0 - - if (N->getValueType(0) != MVT::i16) - return SDValue(); - - // Expand the FrameIndex into ExternalSymbol and a Constant node - // The constant will represent the frame index number - // Get the current function frame - MachineFunction &MF = DAG.getMachineFunction(); - const Function *Func = MF.getFunction(); - const std::string Name = Func->getName(); - - FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0)); - // FIXME there isn't really debug info here - DebugLoc dl = FR->getDebugLoc(); - - // Expand FrameIndex like GlobalAddress and ExternalSymbol - // Also use Offset field for lo and hi parts. The default - // offset is zero. - - SDValue ES; - int FrameOffset; - SDValue FI = SDValue(N,0); - LegalizeFrameIndex(FI, DAG, ES, FrameOffset); - SDValue Offset = DAG.getConstant(FrameOffset, MVT::i8); - SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, ES, Offset); - SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, ES, Offset); - return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi); -} - - -SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) const { - StoreSDNode *St = cast<StoreSDNode>(N); - SDValue Chain = St->getChain(); - SDValue Src = St->getValue(); - SDValue Ptr = St->getBasePtr(); - EVT ValueType = Src.getValueType(); - unsigned StoreOffset = 0; - DebugLoc dl = N->getDebugLoc(); - - SDValue PtrLo, PtrHi; - LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, StoreOffset, dl); - - if (ValueType == MVT::i8) { - return DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, Src, - PtrLo, PtrHi, - DAG.getConstant (0 + StoreOffset, MVT::i8)); - } - else if (ValueType == MVT::i16) { - // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR. - SDValue SrcLo, SrcHi; - GetExpandedParts(Src, DAG, SrcLo, SrcHi); - SDValue ChainLo = Chain, ChainHi = Chain; - // FIXME: This makes unsafe assumptions. The Chain may be a TokenFactor - // created for an unrelated purpose, in which case it may not have - // exactly two operands. Also, even if it does have two operands, they - // may not be the low and high parts of an aligned load that was split. - if (Chain.getOpcode() == ISD::TokenFactor) { - ChainLo = Chain.getOperand(0); - ChainHi = Chain.getOperand(1); - } - SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, - ChainLo, - SrcLo, PtrLo, PtrHi, - DAG.getConstant (0 + StoreOffset, MVT::i8)); - - SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi, - SrcHi, PtrLo, PtrHi, - DAG.getConstant (1 + StoreOffset, MVT::i8)); - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, getChain(Store1), - getChain(Store2)); - } - else if (ValueType == MVT::i32) { - // Get the Lo and Hi parts from MERGE_VALUE or BUILD_PAIR. - SDValue SrcLo, SrcHi; - GetExpandedParts(Src, DAG, SrcLo, SrcHi); - - // Get the expanded parts of each of SrcLo and SrcHi. - SDValue SrcLo1, SrcLo2, SrcHi1, SrcHi2; - GetExpandedParts(SrcLo, DAG, SrcLo1, SrcLo2); - GetExpandedParts(SrcHi, DAG, SrcHi1, SrcHi2); - - SDValue ChainLo = Chain, ChainHi = Chain; - // FIXME: This makes unsafe assumptions; see the FIXME above. - if (Chain.getOpcode() == ISD::TokenFactor) { - ChainLo = Chain.getOperand(0); - ChainHi = Chain.getOperand(1); - } - SDValue ChainLo1 = ChainLo, ChainLo2 = ChainLo, ChainHi1 = ChainHi, - ChainHi2 = ChainHi; - // FIXME: This makes unsafe assumptions; see the FIXME above. - if (ChainLo.getOpcode() == ISD::TokenFactor) { - ChainLo1 = ChainLo.getOperand(0); - ChainLo2 = ChainLo.getOperand(1); - } - // FIXME: This makes unsafe assumptions; see the FIXME above. - if (ChainHi.getOpcode() == ISD::TokenFactor) { - ChainHi1 = ChainHi.getOperand(0); - ChainHi2 = ChainHi.getOperand(1); - } - SDValue Store1 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, - ChainLo1, - SrcLo1, PtrLo, PtrHi, - DAG.getConstant (0 + StoreOffset, MVT::i8)); - - SDValue Store2 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainLo2, - SrcLo2, PtrLo, PtrHi, - DAG.getConstant (1 + StoreOffset, MVT::i8)); - - SDValue Store3 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi1, - SrcHi1, PtrLo, PtrHi, - DAG.getConstant (2 + StoreOffset, MVT::i8)); - - SDValue Store4 = DAG.getNode(PIC16ISD::PIC16Store, dl, MVT::Other, ChainHi2, - SrcHi2, PtrLo, PtrHi, - DAG.getConstant (3 + StoreOffset, MVT::i8)); - - SDValue RetLo = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - getChain(Store1), getChain(Store2)); - SDValue RetHi = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - getChain(Store3), getChain(Store4)); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetLo, RetHi); - - } else if (ValueType == MVT::i64) { - SDValue SrcLo, SrcHi; - GetExpandedParts(Src, DAG, SrcLo, SrcHi); - SDValue ChainLo = Chain, ChainHi = Chain; - // FIXME: This makes unsafe assumptions; see the FIXME above. - if (Chain.getOpcode() == ISD::TokenFactor) { - ChainLo = Chain.getOperand(0); - ChainHi = Chain.getOperand(1); - } - SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL, - 0 + StoreOffset, false, false, 0); - - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(4, Ptr.getValueType())); - SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL, - 1 + StoreOffset, false, false, 0); - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, - Store2); - } else { - assert (0 && "value type not supported"); - return SDValue(); - } -} - -SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, - SelectionDAG &DAG) - const { - ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0)); - // FIXME there isn't really debug info here - DebugLoc dl = ES->getDebugLoc(); - - SDValue TES = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8); - SDValue Offset = DAG.getConstant(0, MVT::i8); - SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TES, Offset); - SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TES, Offset); - - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi); -} - -// ExpandGlobalAddress - -SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, - SelectionDAG &DAG) const { - GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0)); - // FIXME there isn't really debug info here - DebugLoc dl = G->getDebugLoc(); - - SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), N->getDebugLoc(), - MVT::i8, - G->getOffset()); - - SDValue Offset = DAG.getConstant(0, MVT::i8); - SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, TGA, Offset); - SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, TGA, Offset); - - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi); -} - -bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) const { - assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!"); - - if (Op.getOpcode() == ISD::BUILD_PAIR) { - if (Op.getOperand(0).getOpcode() == PIC16ISD::Lo) - return true; - } - return false; -} - -// Return true if DirectAddress is in ROM_SPACE -bool PIC16TargetLowering::isRomAddress(const SDValue &Op) const { - - // RomAddress is a GlobalAddress in ROM_SPACE_ - // If the Op is not a GlobalAddress return NULL without checking - // anything further. - if (!isDirectAddress(Op)) - return false; - - // Its a GlobalAddress. - // It is BUILD_PAIR((PIC16Lo TGA), (PIC16Hi TGA)) and Op is BUILD_PAIR - SDValue TGA = Op.getOperand(0).getOperand(0); - GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(TGA); - - if (GSDN->getAddressSpace() == PIC16ISD::ROM_SPACE) - return true; - - // Any other address space return it false - return false; -} - - -// GetExpandedParts - This function is on the similiar lines as -// the GetExpandedInteger in type legalizer is. This returns expanded -// parts of Op in Lo and Hi. - -void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG, - SDValue &Lo, SDValue &Hi) const { - SDNode *N = Op.getNode(); - DebugLoc dl = N->getDebugLoc(); - EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - - // Extract the lo component. - Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op, - DAG.getConstant(0, MVT::i8)); - - // extract the hi component - Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op, - DAG.getConstant(1, MVT::i8)); -} - -// Legalize FrameIndex into ExternalSymbol and offset. -void -PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, - SDValue &ES, int &Offset) const { - - MachineFunction &MF = DAG.getMachineFunction(); - const Function *Func = MF.getFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); - const std::string Name = Func->getName(); - - FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op); - - // FrameIndices are not stack offsets. But they represent the request - // for space on stack. That space requested may be more than one byte. - // Therefore, to calculate the stack offset that a FrameIndex aligns - // with, we need to traverse all the FrameIndices available earlier in - // the list and add their requested size. - unsigned FIndex = FR->getIndex(); - const char *tmpName; - if (FIndex < FuncInfo->getReservedFrameCount()) { - tmpName = ESNames::createESName(PAN::getFrameLabel(Name)); - ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); - Offset = 0; - for (unsigned i=0; i<FIndex ; ++i) { - Offset += MFI->getObjectSize(i); - } - } else { - // FrameIndex has been made for some temporary storage - tmpName = ESNames::createESName(PAN::getTempdataLabel(Name)); - ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); - Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex), MF); - } - - return; -} - -// This function legalizes the PIC16 Addresses. If the Pointer is -// -- Direct address variable residing -// --> then a Banksel for that variable will be created. -// -- Rom variable -// --> then it will be treated as an indirect address. -// -- Indirect address -// --> then the address will be loaded into FSR -// -- ADD with constant operand -// --> then constant operand of ADD will be returned as Offset -// and non-constant operand of ADD will be treated as pointer. -// Returns the high and lo part of the address, and the offset(in case of ADD). - -void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, - SDValue &Lo, SDValue &Hi, - unsigned &Offset, DebugLoc dl) const { - - // Offset, by default, should be 0 - Offset = 0; - - // If the pointer is ADD with constant, - // return the constant value as the offset - if (Ptr.getOpcode() == ISD::ADD) { - SDValue OperLeft = Ptr.getOperand(0); - SDValue OperRight = Ptr.getOperand(1); - if ((OperLeft.getOpcode() == ISD::Constant) && - (dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue() < 32 )) { - Offset = dyn_cast<ConstantSDNode>(OperLeft)->getZExtValue(); - Ptr = OperRight; - } else if ((OperRight.getOpcode() == ISD::Constant) && - (dyn_cast<ConstantSDNode>(OperRight)->getZExtValue() < 32 )){ - Offset = dyn_cast<ConstantSDNode>(OperRight)->getZExtValue(); - Ptr = OperLeft; - } - } - - // If the pointer is Type i8 and an external symbol - // then treat it as direct address. - // One example for such case is storing and loading - // from function frame during a call - if (Ptr.getValueType() == MVT::i8) { - switch (Ptr.getOpcode()) { - case ISD::TargetExternalSymbol: - Lo = Ptr; - Hi = DAG.getConstant(1, MVT::i8); - return; - } - } - - // Expansion of FrameIndex has Lo/Hi parts - if (isDirectAddress(Ptr)) { - SDValue TFI = Ptr.getOperand(0).getOperand(0); - int FrameOffset; - if (TFI.getOpcode() == ISD::TargetFrameIndex) { - LegalizeFrameIndex(TFI, DAG, Lo, FrameOffset); - Hi = DAG.getConstant(1, MVT::i8); - Offset += FrameOffset; - return; - } else if (TFI.getOpcode() == ISD::TargetExternalSymbol) { - // FrameIndex has already been expanded. - // Now just make use of its expansion - Lo = TFI; - Hi = DAG.getConstant(1, MVT::i8); - SDValue FOffset = Ptr.getOperand(0).getOperand(1); - assert (FOffset.getOpcode() == ISD::Constant && - "Invalid operand of PIC16ISD::Lo"); - Offset += dyn_cast<ConstantSDNode>(FOffset)->getZExtValue(); - return; - } - } - - if (isDirectAddress(Ptr) && !isRomAddress(Ptr)) { - // Direct addressing case for RAM variables. The Hi part is constant - // and the Lo part is the TGA itself. - Lo = Ptr.getOperand(0).getOperand(0); - - // For direct addresses Hi is a constant. Value 1 for the constant - // signifies that banksel needs to generated for it. Value 0 for - // the constant signifies that banksel does not need to be generated - // for it. Mark it as 1 now and optimize later. - Hi = DAG.getConstant(1, MVT::i8); - return; - } - - // Indirect addresses. Get the hi and lo parts of ptr. - GetExpandedParts(Ptr, DAG, Lo, Hi); - - // Put the hi and lo parts into FSR. - Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Lo); - Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Hi); - - return; -} - -SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) const { - LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0)); - SDValue Chain = LD->getChain(); - SDValue Ptr = LD->getBasePtr(); - DebugLoc dl = LD->getDebugLoc(); - - SDValue Load, Offset; - SDVTList Tys; - EVT VT, NewVT; - SDValue PtrLo, PtrHi; - unsigned LoadOffset; - - // Legalize direct/indirect addresses. This will give the lo and hi parts - // of the address and the offset. - LegalizeAddress(Ptr, DAG, PtrLo, PtrHi, LoadOffset, dl); - - // Load from the pointer (direct address or FSR) - VT = N->getValueType(0); - unsigned NumLoads = VT.getSizeInBits() / 8; - std::vector<SDValue> PICLoads; - unsigned iter; - EVT MemVT = LD->getMemoryVT(); - if(ISD::isNON_EXTLoad(N)) { - for (iter=0; iter<NumLoads ; ++iter) { - // Add the pointer offset if any - Offset = DAG.getConstant(iter + LoadOffset, MVT::i8); - Tys = DAG.getVTList(MVT::i8, MVT::Other); - Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi, - Offset); - PICLoads.push_back(Load); - } - } else { - // If it is extended load then use PIC16Load for Memory Bytes - // and for all extended bytes perform action based on type of - // extention - i.e. SignExtendedLoad or ZeroExtendedLoad - - - // For extended loads this is the memory value type - // i.e. without any extension - EVT MemVT = LD->getMemoryVT(); - unsigned MemBytes = MemVT.getSizeInBits() / 8; - // if MVT::i1 is extended to MVT::i8 then MemBytes will be zero - // So set it to one - if (MemBytes == 0) MemBytes = 1; - - unsigned ExtdBytes = VT.getSizeInBits() / 8; - Offset = DAG.getConstant(LoadOffset, MVT::i8); - - Tys = DAG.getVTList(MVT::i8, MVT::Other); - // For MemBytes generate PIC16Load with proper offset - for (iter=0; iter < MemBytes; ++iter) { - // Add the pointer offset if any - Offset = DAG.getConstant(iter + LoadOffset, MVT::i8); - Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi, - Offset); - PICLoads.push_back(Load); - } - - // For SignExtendedLoad - if (ISD::isSEXTLoad(N)) { - // For all ExtdBytes use the Right Shifted(Arithmetic) Value of the - // highest MemByte - SDValue SRA = DAG.getNode(ISD::SRA, dl, MVT::i8, Load, - DAG.getConstant(7, MVT::i8)); - for (iter=MemBytes; iter<ExtdBytes; ++iter) { - PICLoads.push_back(SRA); - } - } else if (ISD::isZEXTLoad(N) || ISD::isEXTLoad(N)) { - //} else if (ISD::isZEXTLoad(N)) { - // ZeroExtendedLoad -- For all ExtdBytes use constant 0 - SDValue ConstZero = DAG.getConstant(0, MVT::i8); - for (iter=MemBytes; iter<ExtdBytes; ++iter) { - PICLoads.push_back(ConstZero); - } - } - } - SDValue BP; - - if (VT == MVT::i8) { - // Operand of Load is illegal -- Load itself is legal - return PICLoads[0]; - } - else if (VT == MVT::i16) { - BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, PICLoads[0], PICLoads[1]); - if ((MemVT == MVT::i8) || (MemVT == MVT::i1)) - Chain = getChain(PICLoads[0]); - else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - getChain(PICLoads[0]), getChain(PICLoads[1])); - } else if (VT == MVT::i32) { - SDValue BPs[2]; - BPs[0] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, - PICLoads[0], PICLoads[1]); - BPs[1] = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, - PICLoads[2], PICLoads[3]); - BP = DAG.getNode(ISD::BUILD_PAIR, dl, VT, BPs[0], BPs[1]); - if ((MemVT == MVT::i8) || (MemVT == MVT::i1)) - Chain = getChain(PICLoads[0]); - else if (MemVT == MVT::i16) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - getChain(PICLoads[0]), getChain(PICLoads[1])); - else { - SDValue Chains[2]; - Chains[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - getChain(PICLoads[0]), getChain(PICLoads[1])); - Chains[1] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - getChain(PICLoads[2]), getChain(PICLoads[3])); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Chains[0], Chains[1]); - } - } - Tys = DAG.getVTList(VT, MVT::Other); - return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain); -} - -SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { - // We should have handled larger operands in type legalizer itself. - assert (Op.getValueType() == MVT::i8 && "illegal shift to lower"); - - SDNode *N = Op.getNode(); - SDValue Value = N->getOperand(0); - SDValue Amt = N->getOperand(1); - PIC16ISD::PIC16Libcall CallCode; - switch (N->getOpcode()) { - case ISD::SRA: - CallCode = PIC16ISD::SRA_I8; - break; - case ISD::SHL: - CallCode = PIC16ISD::SLL_I8; - break; - case ISD::SRL: - CallCode = PIC16ISD::SRL_I8; - break; - default: - assert ( 0 && "This shift is not implemented yet."); - return SDValue(); - } - SmallVector<SDValue, 2> Ops(2); - Ops[0] = Value; - Ops[1] = Amt; - SDValue Call = MakePIC16Libcall(CallCode, N->getValueType(0), &Ops[0], 2, - true, DAG, N->getDebugLoc()); - return Call; -} - -SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { - // We should have handled larger operands in type legalizer itself. - assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower"); - - SDNode *N = Op.getNode(); - SmallVector<SDValue, 2> Ops(2); - Ops[0] = N->getOperand(0); - Ops[1] = N->getOperand(1); - SDValue Call = MakePIC16Libcall(PIC16ISD::MUL_I8, N->getValueType(0), - &Ops[0], 2, true, DAG, N->getDebugLoc()); - return Call; -} - -void -PIC16TargetLowering::LowerOperationWrapper(SDNode *N, - SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) const { - SDValue Op = SDValue(N, 0); - SDValue Res; - unsigned i; - switch (Op.getOpcode()) { - case ISD::LOAD: - Res = ExpandLoad(Op.getNode(), DAG); break; - default: { - // All other operations are handled in LowerOperation. - Res = LowerOperation(Op, DAG); - if (Res.getNode()) - Results.push_back(Res); - - return; - } - } - - N = Res.getNode(); - unsigned NumValues = N->getNumValues(); - for (i = 0; i < NumValues ; i++) { - Results.push_back(SDValue(N, i)); - } -} - -SDValue PIC16TargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - case ISD::ADD: - case ISD::ADDC: - case ISD::ADDE: - return LowerADD(Op, DAG); - case ISD::SUB: - case ISD::SUBC: - case ISD::SUBE: - return LowerSUB(Op, DAG); - case ISD::LOAD: - return ExpandLoad(Op.getNode(), DAG); - case ISD::STORE: - return ExpandStore(Op.getNode(), DAG); - case ISD::MUL: - return LowerMUL(Op, DAG); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - return LowerShift(Op, DAG); - case ISD::OR: - case ISD::AND: - case ISD::XOR: - return LowerBinOp(Op, DAG); - case ISD::BR_CC: - return LowerBR_CC(Op, DAG); - case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG); - } - return SDValue(); -} - -SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, - SelectionDAG &DAG, - DebugLoc dl) const { - assert (Op.getValueType() == MVT::i8 - && "illegal value type to store on stack."); - - MachineFunction &MF = DAG.getMachineFunction(); - const Function *Func = MF.getFunction(); - const std::string FuncName = Func->getName(); - - - // Put the value on stack. - // Get a stack slot index and convert to es. - int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false); - const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); - SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); - - // Store the value to ES. - SDValue Store = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, - DAG.getEntryNode(), - Op, ES, - DAG.getConstant (1, MVT::i8), // Banksel. - DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), - MVT::i8)); - - // Load the value from ES. - SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other); - SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store, - ES, DAG.getConstant (1, MVT::i8), - DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), - MVT::i8)); - - return Load.getValue(0); -} - -SDValue PIC16TargetLowering:: -LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, - SDValue DataAddr_Lo, SDValue DataAddr_Hi, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG) const { - unsigned NumOps = Outs.size(); - - // If call has no arguments then do nothing and return. - if (NumOps == 0) - return Chain; - - std::vector<SDValue> Ops; - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Arg, StoreRet; - - // For PIC16 ABI the arguments come after the return value. - unsigned RetVals = Ins.size(); - for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) { - // Get the arguments - Arg = OutVals[i]; - - Ops.clear(); - Ops.push_back(Chain); - Ops.push_back(Arg); - Ops.push_back(DataAddr_Lo); - Ops.push_back(DataAddr_Hi); - Ops.push_back(DAG.getConstant(ArgOffset, MVT::i8)); - Ops.push_back(InFlag); - - StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size()); - - Chain = getChain(StoreRet); - InFlag = getOutFlag(StoreRet); - ArgOffset++; - } - return Chain; -} - -SDValue PIC16TargetLowering:: -LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { - unsigned NumOps = Outs.size(); - std::string Name; - SDValue Arg, StoreAt; - EVT ArgVT; - unsigned Size=0; - - // If call has no arguments then do nothing and return. - if (NumOps == 0) - return Chain; - - // FIXME: This portion of code currently assumes only - // primitive types being passed as arguments. - - // Legalize the address before use - SDValue PtrLo, PtrHi; - unsigned AddressOffset; - int StoreOffset = 0; - LegalizeAddress(ArgLabel, DAG, PtrLo, PtrHi, AddressOffset, dl); - SDValue StoreRet; - - std::vector<SDValue> Ops; - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - for (unsigned i=0, Offset = 0; i<NumOps; i++) { - // Get the argument - Arg = OutVals[i]; - StoreOffset = (Offset + AddressOffset); - - // Store the argument on frame - - Ops.clear(); - Ops.push_back(Chain); - Ops.push_back(Arg); - Ops.push_back(PtrLo); - Ops.push_back(PtrHi); - Ops.push_back(DAG.getConstant(StoreOffset, MVT::i8)); - Ops.push_back(InFlag); - - StoreRet = DAG.getNode (PIC16ISD::PIC16StWF, dl, Tys, &Ops[0], Ops.size()); - - Chain = getChain(StoreRet); - InFlag = getOutFlag(StoreRet); - - // Update the frame offset to be used for next argument - ArgVT = Arg.getValueType(); - Size = ArgVT.getSizeInBits(); - Size = Size/8; // Calculate size in bytes - Offset += Size; // Increase the frame offset - } - return Chain; -} - -SDValue PIC16TargetLowering:: -LowerIndirectCallReturn(SDValue Chain, SDValue InFlag, - SDValue DataAddr_Lo, SDValue DataAddr_Hi, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - unsigned RetVals = Ins.size(); - - // If call does not have anything to return - // then do nothing and go back. - if (RetVals == 0) - return Chain; - - // Call has something to return - SDValue LoadRet; - - SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag); - for(unsigned i=0;i<RetVals;i++) { - LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, DataAddr_Lo, - DataAddr_Hi, DAG.getConstant(i, MVT::i8), - InFlag); - InFlag = getOutFlag(LoadRet); - Chain = getChain(LoadRet); - InVals.push_back(LoadRet); - } - return Chain; -} - -SDValue PIC16TargetLowering:: -LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - - // Currently handling primitive types only. They will come in - // i8 parts - unsigned RetVals = Ins.size(); - - // Return immediately if the return type is void - if (RetVals == 0) - return Chain; - - // Call has something to return - - // Legalize the address before use - SDValue LdLo, LdHi; - unsigned LdOffset; - LegalizeAddress(RetLabel, DAG, LdLo, LdHi, LdOffset, dl); - - SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag); - SDValue LoadRet; - - for(unsigned i=0, Offset=0;i<RetVals;i++) { - - LoadRet = DAG.getNode(PIC16ISD::PIC16LdWF, dl, Tys, Chain, LdLo, LdHi, - DAG.getConstant(LdOffset + Offset, MVT::i8), - InFlag); - - InFlag = getOutFlag(LoadRet); - - Chain = getChain(LoadRet); - Offset++; - InVals.push_back(LoadRet); - } - - return Chain; -} - -SDValue -PIC16TargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { - - // Number of values to return - unsigned NumRet = Outs.size(); - - // Function returns value always on stack with the offset starting - // from 0 - MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); - std::string FuncName = F->getName(); - - const char *tmpName = ESNames::createESName(PAN::getFrameLabel(FuncName)); - SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); - SDValue BS = DAG.getConstant(1, MVT::i8); - SDValue RetVal; - for(unsigned i=0;i<NumRet; ++i) { - RetVal = OutVals[i]; - Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal, - ES, BS, - DAG.getConstant (i, MVT::i8)); - - } - return DAG.getNode(PIC16ISD::RET, dl, MVT::Other, Chain); -} - -void PIC16TargetLowering:: -GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, - SDValue &DataAddr_Lo, SDValue &DataAddr_Hi, - SelectionDAG &DAG) const { - assert (Callee.getOpcode() == PIC16ISD::PIC16Connect - && "Don't know what to do of such callee!!"); - SDValue ZeroOperand = DAG.getConstant(0, MVT::i8); - SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand); - Chain = getChain(SeqStart); - SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency - - // Get the Lo and Hi part of code address - SDValue Lo = Callee.getOperand(0); - SDValue Hi = Callee.getOperand(1); - - SDValue Data_Lo, Data_Hi; - SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag); - // Subtract 2 from Address to get the Lower part of DataAddress. - SDVTList VTList = DAG.getVTList(MVT::i8, MVT::Flag); - Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo, - DAG.getConstant(2, MVT::i8)); - SDValue Ops[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)}; - Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, Ops, 3); - SDValue PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi); - Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH); - SDValue Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee, - OperFlag); - Chain = getChain(Call); - OperFlag = getOutFlag(Call); - SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand, - OperFlag); - Chain = getChain(SeqEnd); - OperFlag = getOutFlag(SeqEnd); - - // Low part of Data Address - DataAddr_Lo = DAG.getNode(PIC16ISD::MTLO, dl, MVT::i8, Call, OperFlag); - - // Make the second call. - SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand); - Chain = getChain(SeqStart); - OperFlag = getOutFlag(SeqStart); // To manage the data dependency - - // Subtract 1 from Address to get high part of data address. - Data_Lo = DAG.getNode(ISD::SUBC, dl, VTList, Lo, - DAG.getConstant(1, MVT::i8)); - SDValue HiOps[3] = { Hi, DAG.getConstant(0, MVT::i8), Data_Lo.getValue(1)}; - Data_Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); - PCLATH = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, Data_Hi); - - // Use new Lo to make another CALLW - Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Data_Lo, PCLATH); - Call = DAG.getNode(PIC16ISD::CALLW, dl, Tys, Chain, Callee, OperFlag); - Chain = getChain(Call); - OperFlag = getOutFlag(Call); - SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand, - OperFlag); - Chain = getChain(SeqEnd); - OperFlag = getOutFlag(SeqEnd); - // Hi part of Data Address - DataAddr_Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Call, OperFlag); -} - -SDValue -PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - // PIC16 target does not yet support tail call optimization. - isTailCall = false; - - assert(Callee.getValueType() == MVT::i16 && - "Don't know how to legalize this call node!!!"); - - // The flag to track if this is a direct or indirect call. - bool IsDirectCall = true; - unsigned RetVals = Ins.size(); - unsigned NumArgs = Outs.size(); - - SDValue DataAddr_Lo, DataAddr_Hi; - if (!isa<GlobalAddressSDNode>(Callee) && - !isa<ExternalSymbolSDNode>(Callee)) { - IsDirectCall = false; // This is indirect call - - // If this is an indirect call then to pass the arguments - // and read the return value back, we need the data address - // of the function being called. - // To get the data address two more calls need to be made. - - // Come here for indirect calls - SDValue Lo, Hi; - // Indirect addresses. Get the hi and lo parts of ptr. - GetExpandedParts(Callee, DAG, Lo, Hi); - // Connect Lo and Hi parts of the callee with the PIC16Connect - Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi); - - // Read DataAddress only if we have to pass arguments or - // read return value. - if ((RetVals > 0) || (NumArgs > 0)) - GetDataAddress(dl, Callee, Chain, DataAddr_Lo, DataAddr_Hi, DAG); - } - - SDValue ZeroOperand = DAG.getConstant(0, MVT::i8); - - // Start the call sequence. - // Carring the Constant 0 along the CALLSEQSTART - // because there is nothing else to carry. - SDValue SeqStart = DAG.getCALLSEQ_START(Chain, ZeroOperand); - Chain = getChain(SeqStart); - SDValue OperFlag = getOutFlag(SeqStart); // To manage the data dependency - std::string Name; - - // For any direct call - callee will be GlobalAddressNode or - // ExternalSymbol - SDValue ArgLabel, RetLabel; - if (IsDirectCall) { - // Considering the GlobalAddressNode case here. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i8); - Name = G->getGlobal()->getName(); - } else {// Considering the ExternalSymbol case here - ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee); - Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), MVT::i8); - Name = ES->getSymbol(); - } - - // Label for argument passing - const char *argFrame = ESNames::createESName(PAN::getArgsLabel(Name)); - ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8); - - // Label for reading return value - const char *retName = ESNames::createESName(PAN::getRetvalLabel(Name)); - RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8); - } else { - // if indirect call - SDValue CodeAddr_Lo = Callee.getOperand(0); - SDValue CodeAddr_Hi = Callee.getOperand(1); - - /*CodeAddr_Lo = DAG.getNode(ISD::ADD, dl, MVT::i8, CodeAddr_Lo, - DAG.getConstant(2, MVT::i8));*/ - - // move Hi part in PCLATH - CodeAddr_Hi = DAG.getNode(PIC16ISD::MTPCLATH, dl, MVT::i8, CodeAddr_Hi); - Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, CodeAddr_Lo, - CodeAddr_Hi); - } - - // Pass the argument to function before making the call. - SDValue CallArgs; - if (IsDirectCall) { - CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag, - Outs, OutVals, dl, DAG); - Chain = getChain(CallArgs); - OperFlag = getOutFlag(CallArgs); - } else { - CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo, - DataAddr_Hi, Outs, OutVals, Ins, - dl, DAG); - Chain = getChain(CallArgs); - OperFlag = getOutFlag(CallArgs); - } - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue PICCall = DAG.getNode(PIC16ISD::CALL, dl, Tys, Chain, Callee, - OperFlag); - Chain = getChain(PICCall); - OperFlag = getOutFlag(PICCall); - - - // Carrying the Constant 0 along the CALLSEQSTART - // because there is nothing else to carry. - SDValue SeqEnd = DAG.getCALLSEQ_END(Chain, ZeroOperand, ZeroOperand, - OperFlag); - Chain = getChain(SeqEnd); - OperFlag = getOutFlag(SeqEnd); - - // Lower the return value reading after the call. - if (IsDirectCall) - return LowerDirectCallReturn(RetLabel, Chain, OperFlag, - Ins, dl, DAG, InVals); - else - return LowerIndirectCallReturn(Chain, OperFlag, DataAddr_Lo, - DataAddr_Hi, Ins, dl, DAG, InVals); -} - -bool PIC16TargetLowering::isDirectLoad(const SDValue Op) const { - if (Op.getOpcode() == PIC16ISD::PIC16Load) - if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress - || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol) - return true; - return false; -} - -// NeedToConvertToMemOp - Returns true if one of the operands of the -// operation 'Op' needs to be put into memory. Also returns the -// operand no. of the operand to be converted in 'MemOp'. Remember, PIC16 has -// no instruction that can operation on two registers. Most insns take -// one register and one memory operand (addwf) / Constant (addlw). -bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, - SelectionDAG &DAG) const { - // If one of the operand is a constant, return false. - if (Op.getOperand(0).getOpcode() == ISD::Constant || - Op.getOperand(1).getOpcode() == ISD::Constant) - return false; - - // Return false if one of the operands is already a direct - // load and that operand has only one use. - if (isDirectLoad(Op.getOperand(0))) { - if (Op.getOperand(0).hasOneUse()) { - // Legal and profitable folding check uses the NodeId of DAG nodes. - // This NodeId is assigned by topological order. Therefore first - // assign topological order then perform legal and profitable check. - // Note:- Though this ordering is done before begining with legalization, - // newly added node during legalization process have NodeId=-1 (NewNode) - // therefore before performing any check proper ordering of the node is - // required. - DAG.AssignTopologicalOrder(); - - // Direct load operands are folded in binary operations. But before folding - // verify if this folding is legal. Fold only if it is legal otherwise - // convert this direct load to a separate memory operation. - if (SelectionDAGISel::IsLegalToFold(Op.getOperand(0), - Op.getNode(), Op.getNode(), - CodeGenOpt::Default)) - return false; - else - MemOp = 0; - } - } - - // For operations that are non-cummutative there is no need to check - // for right operand because folding right operand may result in - // incorrect operation. - if (! SelectionDAG::isCommutativeBinOp(Op.getOpcode())) - return true; - - if (isDirectLoad(Op.getOperand(1))) { - if (Op.getOperand(1).hasOneUse()) { - // Legal and profitable folding check uses the NodeId of DAG nodes. - // This NodeId is assigned by topological order. Therefore first - // assign topological order then perform legal and profitable check. - // Note:- Though this ordering is done before begining with legalization, - // newly added node during legalization process have NodeId=-1 (NewNode) - // therefore before performing any check proper ordering of the node is - // required. - DAG.AssignTopologicalOrder(); - - // Direct load operands are folded in binary operations. But before folding - // verify if this folding is legal. Fold only if it is legal otherwise - // convert this direct load to a separate memory operation. - if (SelectionDAGISel::IsLegalToFold(Op.getOperand(1), - Op.getNode(), Op.getNode(), - CodeGenOpt::Default)) - return false; - else - MemOp = 1; - } - } - return true; -} - -// LowerBinOp - Lower a commutative binary operation that does not -// affect status flag carry. -SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); - - // We should have handled larger operands in type legalizer itself. - assert (Op.getValueType() == MVT::i8 && "illegal Op to lower"); - - unsigned MemOp = 1; - if (NeedToConvertToMemOp(Op, MemOp, DAG)) { - // Put one value on stack. - SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl); - - return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1), - NewVal); - } - else { - return Op; - } -} - -// LowerADD - Lower all types of ADD operations including the ones -// that affects carry. -SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { - // We should have handled larger operands in type legalizer itself. - assert (Op.getValueType() == MVT::i8 && "illegal add to lower"); - DebugLoc dl = Op.getDebugLoc(); - unsigned MemOp = 1; - if (NeedToConvertToMemOp(Op, MemOp, DAG)) { - // Put one value on stack. - SDValue NewVal = ConvertToMemOperand (Op.getOperand(MemOp), DAG, dl); - - // ADDC and ADDE produce two results. - SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag); - - // ADDE has three operands, the last one is the carry bit. - if (Op.getOpcode() == ISD::ADDE) - return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1), - NewVal, Op.getOperand(2)); - // ADDC has two operands. - else if (Op.getOpcode() == ISD::ADDC) - return DAG.getNode(Op.getOpcode(), dl, Tys, Op.getOperand(MemOp ^ 1), - NewVal); - // ADD it is. It produces only one result. - else - return DAG.getNode(Op.getOpcode(), dl, MVT::i8, Op.getOperand(MemOp ^ 1), - NewVal); - } - else - return Op; -} - -SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); - // We should have handled larger operands in type legalizer itself. - assert (Op.getValueType() == MVT::i8 && "illegal sub to lower"); - unsigned MemOp = 1; - SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag); - - // Since we don't have an instruction for X - c , - // we can change it to X + (-c) - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - if (C && (Op.getOpcode() == ISD::SUB)) - { - return DAG.getNode(ISD::ADD, - dl, MVT::i8, Op.getOperand(0), - DAG.getConstant(0-(C->getZExtValue()), MVT::i8)); - } - - if (NeedToConvertToMemOp(Op, MemOp, DAG) || - (isDirectLoad(Op.getOperand(1)) && - (!isDirectLoad(Op.getOperand(0))) && - (Op.getOperand(0).getOpcode() != ISD::Constant))) - { - // Put first operand on stack. - SDValue NewVal = ConvertToMemOperand (Op.getOperand(0), DAG, dl); - - switch (Op.getOpcode()) { - default: - assert (0 && "Opcode unknown."); - case ISD::SUBE: - return DAG.getNode(Op.getOpcode(), - dl, Tys, NewVal, Op.getOperand(1), - Op.getOperand(2)); - break; - case ISD::SUBC: - return DAG.getNode(Op.getOpcode(), - dl, Tys, NewVal, Op.getOperand(1)); - break; - case ISD::SUB: - return DAG.getNode(Op.getOpcode(), - dl, MVT::i8, NewVal, Op.getOperand(1)); - break; - } - } - else - return Op; -} - -void PIC16TargetLowering::InitReservedFrameCount(const Function *F, - SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); - - unsigned NumArgs = F->arg_size(); - - bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID); - - if (isVoidFunc) - FuncInfo->setReservedFrameCount(NumArgs); - else - FuncInfo->setReservedFrameCount(NumArgs + 1); -} - -// LowerFormalArguments - Argument values are loaded from the -// <fname>.args + offset. All arguments are already broken to leaglized -// types, so the offset just runs from 0 to NumArgVals - 1. - -SDValue -PIC16TargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) - const { - unsigned NumArgVals = Ins.size(); - - // Get the callee's name to create the <fname>.args label to pass args. - MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); - std::string FuncName = F->getName(); - - // Reset the map of FI and TmpOffset - ResetTmpOffsetMap(DAG); - // Initialize the ReserveFrameCount - InitReservedFrameCount(F, DAG); - - // Create the <fname>.args external symbol. - const char *tmpName = ESNames::createESName(PAN::getArgsLabel(FuncName)); - SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); - - // Load arg values from the label + offset. - SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Other); - SDValue BS = DAG.getConstant(1, MVT::i8); - for (unsigned i = 0; i < NumArgVals ; ++i) { - SDValue Offset = DAG.getConstant(i, MVT::i8); - SDValue PICLoad = DAG.getNode(PIC16ISD::PIC16LdArg, dl, VTs, Chain, ES, BS, - Offset); - Chain = getChain(PICLoad); - InVals.push_back(PICLoad); - } - - return Chain; -} - -// Perform DAGCombine of PIC16Load. -// FIXME - Need a more elaborate comment here. -SDValue PIC16TargetLowering:: -PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; - SDValue Chain = N->getOperand(0); - if (N->hasNUsesOfValue(0, 0)) { - DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), Chain); - } - return SDValue(); -} - -// For all the functions with arguments some STORE nodes are generated -// that store the argument on the frameindex. However in PIC16 the arguments -// are passed on stack only. Therefore these STORE nodes are redundant. -// To remove these STORE nodes will be removed in PerformStoreCombine -// -// Currently this function is doint nothing and will be updated for removing -// unwanted store operations -SDValue PIC16TargetLowering:: -PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const { - return SDValue(N, 0); - /* - // Storing an undef value is of no use, so remove it - if (isStoringUndef(N, Chain, DAG)) { - return Chain; // remove the store and return the chain - } - //else everything is ok. - return SDValue(N, 0); - */ -} - -SDValue PIC16TargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - switch (N->getOpcode()) { - case ISD::STORE: - return PerformStoreCombine(N, DCI); - case PIC16ISD::PIC16Load: - return PerformPIC16LoadCombine(N, DCI); - } - return SDValue(); -} - -static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code!"); - case ISD::SETNE: return PIC16CC::NE; - case ISD::SETEQ: return PIC16CC::EQ; - case ISD::SETGT: return PIC16CC::GT; - case ISD::SETGE: return PIC16CC::GE; - case ISD::SETLT: return PIC16CC::LT; - case ISD::SETLE: return PIC16CC::LE; - case ISD::SETULT: return PIC16CC::ULT; - case ISD::SETULE: return PIC16CC::ULE; - case ISD::SETUGE: return PIC16CC::UGE; - case ISD::SETUGT: return PIC16CC::UGT; - } -} - -// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so -// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition. -static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, - ISD::CondCode CC, unsigned &SPCC) { - if (isa<ConstantSDNode>(RHS) && - cast<ConstantSDNode>(RHS)->isNullValue() && - CC == ISD::SETNE && - (LHS.getOpcode() == PIC16ISD::SELECT_ICC && - LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) && - isa<ConstantSDNode>(LHS.getOperand(0)) && - isa<ConstantSDNode>(LHS.getOperand(1)) && - cast<ConstantSDNode>(LHS.getOperand(0))->isOne() && - cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) { - SDValue CMPCC = LHS.getOperand(3); - SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue(); - LHS = CMPCC.getOperand(0); - RHS = CMPCC.getOperand(1); - } -} - -// Returns appropriate CMP insn and corresponding condition code in PIC16CC -SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, - unsigned CC, SDValue &PIC16CC, - SelectionDAG &DAG, DebugLoc dl) const { - PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC; - - // PIC16 sub is literal - W. So Swap the operands and condition if needed. - // i.e. a < 12 can be rewritten as 12 > a. - if (RHS.getOpcode() == ISD::Constant) { - - SDValue Tmp = LHS; - LHS = RHS; - RHS = Tmp; - - switch (CondCode) { - default: break; - case PIC16CC::LT: - CondCode = PIC16CC::GT; - break; - case PIC16CC::GT: - CondCode = PIC16CC::LT; - break; - case PIC16CC::ULT: - CondCode = PIC16CC::UGT; - break; - case PIC16CC::UGT: - CondCode = PIC16CC::ULT; - break; - case PIC16CC::GE: - CondCode = PIC16CC::LE; - break; - case PIC16CC::LE: - CondCode = PIC16CC::GE; - break; - case PIC16CC::ULE: - CondCode = PIC16CC::UGE; - break; - case PIC16CC::UGE: - CondCode = PIC16CC::ULE; - break; - } - } - - PIC16CC = DAG.getConstant(CondCode, MVT::i8); - - // These are signed comparisons. - SDValue Mask = DAG.getConstant(128, MVT::i8); - if (isSignedComparison(CondCode)) { - LHS = DAG.getNode (ISD::XOR, dl, MVT::i8, LHS, Mask); - RHS = DAG.getNode (ISD::XOR, dl, MVT::i8, RHS, Mask); - } - - SDVTList VTs = DAG.getVTList (MVT::i8, MVT::Flag); - // We can use a subtract operation to set the condition codes. But - // we need to put one operand in memory if required. - // Nothing to do if the first operand is already a valid type (direct load - // for subwf and literal for sublw) and it is used by this operation only. - if ((LHS.getOpcode() == ISD::Constant || isDirectLoad(LHS)) - && LHS.hasOneUse()) - return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS); - - // else convert the first operand to mem. - LHS = ConvertToMemOperand (LHS, DAG, dl); - return DAG.getNode(PIC16ISD::SUBCC, dl, VTs, LHS, RHS); -} - - -SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, - SelectionDAG &DAG) const { - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); - SDValue TrueVal = Op.getOperand(2); - SDValue FalseVal = Op.getOperand(3); - unsigned ORIGCC = ~0; - DebugLoc dl = Op.getDebugLoc(); - - // If this is a select_cc of a "setcc", and if the setcc got lowered into - // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values. - // i.e. - // A setcc: lhs, rhs, cc is expanded by llvm to - // select_cc: result of setcc, 0, 1, 0, setne - // We can think of it as: - // select_cc: lhs, rhs, 1, 0, cc - LookThroughSetCC(LHS, RHS, CC, ORIGCC); - if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC); - - SDValue PIC16CC; - SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl); - - return DAG.getNode (PIC16ISD::SELECT_ICC, dl, TrueVal.getValueType(), TrueVal, - FalseVal, PIC16CC, Cmp.getValue(1)); -} - -MachineBasicBlock * -PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { - const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); - unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm(); - DebugLoc dl = MI->getDebugLoc(); - - // To "insert" a SELECT_CC instruction, we actually have to insert the diamond - // control-flow pattern. The incoming instruction knows the destination vreg - // to set, the condition code register to branch on, the true/false values to - // select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // [f]bCC copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII.get(PIC16::pic16brcond)).addMBB(sinkMBB).addImm(CC); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] - // ... - BB = sinkMBB; - BuildMI(*BB, BB->begin(), dl, - TII.get(PIC16::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); // LHS of the condition. - SDValue RHS = Op.getOperand(3); // RHS of the condition. - SDValue Dest = Op.getOperand(4); // BB to jump to - unsigned ORIGCC = ~0; - DebugLoc dl = Op.getDebugLoc(); - - // If this is a br_cc of a "setcc", and if the setcc got lowered into - // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values. - LookThroughSetCC(LHS, RHS, CC, ORIGCC); - if (ORIGCC == ~0U) ORIGCC = IntCCToPIC16CC (CC); - - // Get the Compare insn and condition code. - SDValue PIC16CC; - SDValue Cmp = getPIC16Cmp(LHS, RHS, ORIGCC, PIC16CC, DAG, dl); - - return DAG.getNode(PIC16ISD::BRCOND, dl, MVT::Other, Chain, Dest, PIC16CC, - Cmp.getValue(1)); -} - diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h deleted file mode 100644 index d942af4..0000000 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ /dev/null @@ -1,253 +0,0 @@ -//===-- PIC16ISelLowering.h - PIC16 DAG Lowering Interface ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that PIC16 uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16ISELLOWERING_H -#define PIC16ISELLOWERING_H - -#include "PIC16.h" -#include "PIC16Subtarget.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" -#include <map> - -namespace llvm { - namespace PIC16ISD { - enum NodeType { - // Start the numbering from where ISD NodeType finishes. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - Lo, // Low 8-bits of GlobalAddress. - Hi, // High 8-bits of GlobalAddress. - PIC16Load, - PIC16LdArg, // This is replica of PIC16Load but used to load function - // arguments and is being used for facilitating for some - // store removal optimizations. - - PIC16LdWF, - PIC16Store, - PIC16StWF, - Banksel, - MTLO, // Move to low part of FSR - MTHI, // Move to high part of FSR - MTPCLATH, // Move to PCLATCH - PIC16Connect, // General connector for PIC16 nodes - BCF, - LSLF, // PIC16 Logical shift left - LRLF, // PIC16 Logical shift right - RLF, // Rotate left through carry - RRF, // Rotate right through carry - CALL, // PIC16 Call instruction - CALLW, // PIC16 CALLW instruction - SUBCC, // Compare for equality or inequality. - SELECT_ICC, // Pseudo to be caught in scheduler and expanded to brcond. - BRCOND, // Conditional branch. - RET, // Return. - Dummy - }; - - // Keep track of different address spaces. - enum AddressSpace { - RAM_SPACE = 0, // RAM address space - ROM_SPACE = 1 // ROM address space number is 1 - }; - enum PIC16Libcall { - MUL_I8 = RTLIB::UNKNOWN_LIBCALL + 1, - SRA_I8, - SLL_I8, - SRL_I8, - PIC16UnknownCall - }; - } - - - //===--------------------------------------------------------------------===// - // TargetLowering Implementation - //===--------------------------------------------------------------------===// - class PIC16TargetLowering : public TargetLowering { - public: - explicit PIC16TargetLowering(PIC16TargetMachine &TM); - - /// getTargetNodeName - This method returns the name of a target specific - /// DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; - /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const; - virtual MVT::SimpleValueType getCmpLibcallReturnType() const; - SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG) const; - // Call returns - SDValue - LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - SDValue - LowerIndirectCallReturn(SDValue Chain, SDValue InFlag, - SDValue DataAddr_Lo, SDValue DataAddr_Hi, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - - // Call arguments - SDValue - LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; - - SDValue - LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, - SDValue DataAddr_Lo, SDValue DataAddr_Hi, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG) const; - - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC, - SelectionDAG &DAG, DebugLoc dl) const; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; - - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - virtual void ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const; - virtual void LowerOperationWrapper(SDNode *N, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const; - - virtual SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - - virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; - - SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const; - SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG) const; - SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) const; - SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) const; - SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) const; - - SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue PerformStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; - - // This function returns the Tmp Offset for FrameIndex. If any TmpOffset - // already exists for the FI then it returns the same else it creates the - // new offset and returns. - unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size, - MachineFunction &MF) const; - void ResetTmpOffsetMap(SelectionDAG &DAG) const; - void InitReservedFrameCount(const Function *F, - SelectionDAG &DAG) const; - - /// getFunctionAlignment - Return the Log2 alignment of this function. - virtual unsigned getFunctionAlignment(const Function *) const { - // FIXME: The function never seems to be aligned. - return 1; - } - protected: - std::pair<const TargetRegisterClass*, uint8_t> - findRepresentativeClass(EVT VT) const; - private: - // If the Node is a BUILD_PAIR representing a direct Address, - // then this function will return true. - bool isDirectAddress(const SDValue &Op) const; - - // If the Node is a DirectAddress in ROM_SPACE then this - // function will return true - bool isRomAddress(const SDValue &Op) const; - - // Extract the Lo and Hi component of Op. - void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo, - SDValue &Hi) const; - - - // Load pointer can be a direct or indirect address. In PIC16 direct - // addresses need Banksel and Indirect addresses need to be loaded to - // FSR first. Handle address specific cases here. - void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain, - SDValue &NewPtr, unsigned &Offset, DebugLoc dl) const; - - // FrameIndex should be broken down into ExternalSymbol and FrameOffset. - void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, - int &Offset) const; - - // For indirect calls data address of the callee frame need to be - // extracted. This function fills the arguments DataAddr_Lo and - // DataAddr_Hi with the address of the callee frame. - void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, - SDValue &DataAddr_Lo, SDValue &DataAddr_Hi, - SelectionDAG &DAG) const; - - // We can not have both operands of a binary operation in W. - // This function is used to put one operand on stack and generate a load. - SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, - DebugLoc dl) const; - - // This function checks if we need to put an operand of an operation on - // stack and generate a load or not. - // DAG parameter is required to access DAG information during - // analysis. - bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, - SelectionDAG &DAG) const; - - /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can - /// make the right decision when generating code for different targets. - const PIC16Subtarget *Subtarget; - - - // Extending the LIB Call framework of LLVM - // to hold the names of PIC16Libcalls. - const char *PIC16LibcallNames[PIC16ISD::PIC16UnknownCall]; - - // To set and retrieve the lib call names. - void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name); - const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const; - - // Make PIC16 Libcall. - SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, - const SDValue *Ops, unsigned NumOps, bool isSigned, - SelectionDAG &DAG, DebugLoc dl) const; - - // Check if operation has a direct load operand. - inline bool isDirectLoad(const SDValue Op) const; - }; -} // namespace llvm - -#endif // PIC16ISELLOWERING_H diff --git a/lib/Target/PIC16/PIC16InstrFormats.td b/lib/Target/PIC16/PIC16InstrFormats.td deleted file mode 100644 index e213ea8..0000000 --- a/lib/Target/PIC16/PIC16InstrFormats.td +++ /dev/null @@ -1,117 +0,0 @@ -//===- PIC16InstrFormats.td - PIC16 Instruction Formats-------*- tblgen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Describe PIC16 instructions format -// -// All the possible PIC16 fields are: -// -// opcode - operation code. -// f - 7-bit register file address. -// d - 1-bit direction specifier -// k - 8/11 bit literals -// b - 3 bits bit num specifier -// -//===----------------------------------------------------------------------===// - -// Generic PIC16 Format -// PIC16 Instructions are 14-bit wide. - -// FIXME: Add Cooper Specific Formats if any. - -class PIC16Inst<dag outs, dag ins, string asmstr, list<dag> pattern> - : Instruction { - field bits<14> Inst; - - let Namespace = "PIC16"; - dag OutOperandList = outs; - dag InOperandList = ins; - let AsmString = asmstr; - let Pattern = pattern; -} - - -//===----------------------------------------------------------------------===// -// Byte Oriented instruction class in PIC16 : <|opcode|d|f|> -// opcode = 6 bits. -// d = direction = 1 bit. -// f = file register address = 7 bits. -//===----------------------------------------------------------------------===// - -class ByteFormat<bits<6> opcode, dag outs, dag ins, string asmstr, - list<dag> pattern> - :PIC16Inst<outs, ins, asmstr, pattern> { - bits<1> d; - bits<7> f; - - let Inst{13-8} = opcode; - - let Inst{7} = d; - let Inst{6-0} = f; -} - -//===----------------------------------------------------------------------===// -// Bit Oriented instruction class in PIC16 : <|opcode|b|f|> -// opcode = 4 bits. -// b = bit specifier = 3 bits. -// f = file register address = 7 bits. -//===----------------------------------------------------------------------===// - -class BitFormat<bits<4> opcode, dag outs, dag ins, string asmstr, - list<dag> pattern> - : PIC16Inst<outs, ins, asmstr, pattern> { - bits<3> b; - bits<7> f; - - let Inst{13-10} = opcode; - - let Inst{9-7} = b; - let Inst{6-0} = f; -} - -//===----------------------------------------------------------------------===// -// Literal Format instruction class in PIC16 : <|opcode|k|> -// opcode = 6 bits -// k = literal = 8 bits -//===----------------------------------------------------------------------===// - -class LiteralFormat<bits<6> opcode, dag outs, dag ins, string asmstr, - list<dag> pattern> - : PIC16Inst<outs, ins, asmstr, pattern> { - bits<8> k; - - let Inst{13-8} = opcode; - - let Inst{7-0} = k; -} - -//===----------------------------------------------------------------------===// -// Control Format instruction class in PIC16 : <|opcode|k|> -// opcode = 3 bits. -// k = jump address = 11 bits. -//===----------------------------------------------------------------------===// - -class ControlFormat<bits<3> opcode, dag outs, dag ins, string asmstr, - list<dag> pattern> - : PIC16Inst<outs, ins, asmstr, pattern> { - bits<11> k; - - let Inst{13-11} = opcode; - - let Inst{10-0} = k; -} - -//===----------------------------------------------------------------------===// -// Pseudo instruction class in PIC16 -//===----------------------------------------------------------------------===// - -class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> - : PIC16Inst<outs, ins, asmstr, pattern> { - let Inst{13-6} = 0; -} diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp deleted file mode 100644 index 81257f3..0000000 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ /dev/null @@ -1,224 +0,0 @@ -//===- PIC16InstrInfo.cpp - PIC16 Instruction Information -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PIC16 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "PIC16.h" -#include "PIC16ABINames.h" -#include "PIC16InstrInfo.h" -#include "PIC16TargetMachine.h" -#include "PIC16GenInstrInfo.inc" -#include "llvm/Function.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include <cstdio> - - -using namespace llvm; - -// FIXME: Add the subtarget support on this constructor. -PIC16InstrInfo::PIC16InstrInfo(PIC16TargetMachine &tm) - : TargetInstrInfoImpl(PIC16Insts, array_lengthof(PIC16Insts)), - TM(tm), - RegInfo(*this, *TM.getSubtargetImpl()) {} - - -/// isStoreToStackSlot - If the specified machine instruction is a direct -/// store to a stack slot, return the virtual or physical register number of -/// the source reg along with the FrameIndex of the loaded stack slot. -/// If not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than storing to the stack slot. -unsigned PIC16InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - if (MI->getOpcode() == PIC16::movwf - && MI->getOperand(0).isReg() - && MI->getOperand(1).isSymbol()) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - return 0; -} - -/// isLoadFromStackSlot - If the specified machine instruction is a direct -/// load from a stack slot, return the virtual or physical register number of -/// the dest reg along with the FrameIndex of the stack slot. -/// If not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than storing to the stack slot. -unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - if (MI->getOpcode() == PIC16::movf - && MI->getOperand(0).isReg() - && MI->getOperand(1).isSymbol()) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - return 0; -} - - -void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - const PIC16TargetLowering *PTLI = TM.getTargetLowering(); - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - const Function *Func = MBB.getParent()->getFunction(); - const std::string FuncName = Func->getName(); - - const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); - - // On the order of operands here: think "movwf SrcReg, tmp_slot, offset". - if (RC == PIC16::GPRRegisterClass) { - //MachineFunction &MF = *MBB.getParent(); - //MachineRegisterInfo &RI = MF.getRegInfo(); - BuildMI(MBB, I, DL, get(PIC16::movwf)) - .addReg(SrcReg, getKillRegState(isKill)) - .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent())) - .addExternalSymbol(tmpName) - .addImm(1); // Emit banksel for it. - } - else if (RC == PIC16::FSR16RegisterClass) { - // This is a 16-bit register and the frameindex given by llvm is of - // size two here. Break this index N into two zero based indexes and - // put one into the map. The second one is always obtained by adding 1 - // to the first zero based index. In fact it is going to use 3 slots - // as saving FSRs corrupts W also and hence we need to save/restore W also. - - unsigned opcode = (SrcReg == PIC16::FSR0) ? PIC16::save_fsr0 - : PIC16::save_fsr1; - BuildMI(MBB, I, DL, get(opcode)) - .addReg(SrcReg, getKillRegState(isKill)) - .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent())) - .addExternalSymbol(tmpName) - .addImm(1); // Emit banksel for it. - } - else - llvm_unreachable("Can't store this register to stack slot"); -} - -void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - const PIC16TargetLowering *PTLI = TM.getTargetLowering(); - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - const Function *Func = MBB.getParent()->getFunction(); - const std::string FuncName = Func->getName(); - - const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); - - // On the order of operands here: think "movf FrameIndex, W". - if (RC == PIC16::GPRRegisterClass) { - //MachineFunction &MF = *MBB.getParent(); - //MachineRegisterInfo &RI = MF.getRegInfo(); - BuildMI(MBB, I, DL, get(PIC16::movf), DestReg) - .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent())) - .addExternalSymbol(tmpName) - .addImm(1); // Emit banksel for it. - } - else if (RC == PIC16::FSR16RegisterClass) { - // This is a 16-bit register and the frameindex given by llvm is of - // size two here. Break this index N into two zero based indexes and - // put one into the map. The second one is always obtained by adding 1 - // to the first zero based index. In fact it is going to use 3 slots - // as saving FSRs corrupts W also and hence we need to save/restore W also. - - unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0 - : PIC16::restore_fsr1; - BuildMI(MBB, I, DL, get(opcode), DestReg) - .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent())) - .addExternalSymbol(tmpName) - .addImm(1); // Emit banksel for it. - } - else - llvm_unreachable("Can't load this register from stack slot"); -} - -void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - unsigned Opc; - if (PIC16::FSR16RegClass.contains(DestReg, SrcReg)) - Opc = PIC16::copy_fsr; - else if (PIC16::GPRRegClass.contains(DestReg, SrcReg)) - Opc = PIC16::copy_w; - else - llvm_unreachable("Impossible reg-to-reg copy"); - - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); -} - -/// InsertBranch - Insert a branch into the end of the specified -/// MachineBasicBlock. This operands to this method are the same as those -/// returned by AnalyzeBranch. This is invoked in cases where AnalyzeBranch -/// returns success and when an unconditional branch (TBB is non-null, FBB is -/// null, Cond is empty) needs to be inserted. It returns the number of -/// instructions inserted. -unsigned PIC16InstrInfo:: -InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const { - // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - - if (FBB == 0) { // One way branch. - if (Cond.empty()) { - // Unconditional branch? - BuildMI(&MBB, DL, get(PIC16::br_uncond)).addMBB(TBB); - } - return 1; - } - - // FIXME: If the there are some conditions specified then conditional branch - // should be generated. - // For the time being no instruction is being generated therefore - // returning NULL. - return 0; -} - -bool PIC16InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return true; - - // Get the terminator instruction. - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return true; - --I; - } - // Handle unconditional branches. If the unconditional branch's target is - // successor basic block then remove the unconditional branch. - if (I->getOpcode() == PIC16::br_uncond && AllowModify) { - if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = 0; - I->eraseFromParent(); - } - } - return true; -} diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h deleted file mode 100644 index 661b335..0000000 --- a/lib/Target/PIC16/PIC16InstrInfo.h +++ /dev/null @@ -1,76 +0,0 @@ -//===- PIC16InstrInfo.h - PIC16 Instruction Information----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the niversity of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PIC16 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16INSTRUCTIONINFO_H -#define PIC16INSTRUCTIONINFO_H - -#include "PIC16.h" -#include "PIC16RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -namespace llvm { - - -class PIC16InstrInfo : public TargetInstrInfoImpl -{ - PIC16TargetMachine &TM; - const PIC16RegisterInfo RegInfo; -public: - explicit PIC16InstrInfo(PIC16TargetMachine &TM); - - virtual const PIC16RegisterInfo &getRegisterInfo() const { return RegInfo; } - - /// isLoadFromStackSlot - If the specified machine instruction is a direct - /// load from a stack slot, return the virtual or physical register number of - /// the destination along with the FrameIndex of the loaded stack slot. If - /// not, return 0. This predicate must return 0 if the instruction has - /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - /// isStoreToStackSlot - If the specified machine instruction is a direct - /// store to a stack slot, return the virtual or physical register number of - /// the source reg along with the FrameIndex of the loaded stack slot. If - /// not, return 0. This predicate must return 0 if the instruction has - /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - virtual - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const; - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; - }; -} // namespace llvm - -#endif diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td deleted file mode 100644 index 86d36cb..0000000 --- a/lib/Target/PIC16/PIC16InstrInfo.td +++ /dev/null @@ -1,540 +0,0 @@ -//===- PIC16InstrInfo.td - PIC16 Instruction defs -------------*- tblgen-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the PIC16 instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// PIC16 Specific Type Constraints. -//===----------------------------------------------------------------------===// -class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>; -class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>; - -//===----------------------------------------------------------------------===// -// PIC16 Specific Type Profiles. -//===----------------------------------------------------------------------===// - -// Generic type profiles for i8/i16 unary/binary operations. -// Taking one i8 or i16 and producing void. -def SDTI8VoidOp : SDTypeProfile<0, 1, [SDTCisI8<0>]>; -def SDTI16VoidOp : SDTypeProfile<0, 1, [SDTCisI16<0>]>; - -// Taking one value and producing an output of same type. -def SDTI8UnaryOp : SDTypeProfile<1, 1, [SDTCisI8<0>, SDTCisI8<1>]>; -def SDTI16UnaryOp : SDTypeProfile<1, 1, [SDTCisI16<0>, SDTCisI16<1>]>; - -// Taking two values and producing an output of same type. -def SDTI8BinOp : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>]>; -def SDTI16BinOp : SDTypeProfile<1, 2, [SDTCisI16<0>, SDTCisI16<1>, - SDTCisI16<2>]>; - -// Node specific type profiles. -def SDT_PIC16Load : SDTypeProfile<1, 3, [SDTCisI8<0>, SDTCisI8<1>, - SDTCisI8<2>, SDTCisI8<3>]>; - -def SDT_PIC16Store : SDTypeProfile<0, 4, [SDTCisI8<0>, SDTCisI8<1>, - SDTCisI8<2>, SDTCisI8<3>]>; - -def SDT_PIC16Connect : SDTypeProfile<1, 2, [SDTCisI8<0>, SDTCisI8<1>, - SDTCisI8<2>]>; - -// PIC16ISD::CALL type prorile -def SDT_PIC16call : SDTypeProfile<0, -1, [SDTCisInt<0>]>; -def SDT_PIC16callw : SDTypeProfile<1, -1, [SDTCisInt<0>]>; - -// PIC16ISD::BRCOND -def SDT_PIC16Brcond: SDTypeProfile<0, 2, - [SDTCisVT<0, OtherVT>, SDTCisI8<1>]>; - -// PIC16ISD::BRCOND -def SDT_PIC16Selecticc: SDTypeProfile<1, 3, - [SDTCisI8<0>, SDTCisI8<1>, SDTCisI8<2>, - SDTCisI8<3>]>; - -//===----------------------------------------------------------------------===// -// PIC16 addressing modes matching via DAG. -//===----------------------------------------------------------------------===// -def diraddr : ComplexPattern<i8, 1, "SelectDirectAddr", [], []>; - -//===----------------------------------------------------------------------===// -// PIC16 Specific Node Definitions. -//===----------------------------------------------------------------------===// -def PIC16callseq_start : SDNode<"ISD::CALLSEQ_START", SDTI8VoidOp, - [SDNPHasChain, SDNPOutFlag]>; -def PIC16callseq_end : SDNode<"ISD::CALLSEQ_END", SDTI8VoidOp, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; - -// Low 8-bits of GlobalAddress. -def PIC16Lo : SDNode<"PIC16ISD::Lo", SDTI8BinOp>; - -// High 8-bits of GlobalAddress. -def PIC16Hi : SDNode<"PIC16ISD::Hi", SDTI8BinOp>; - -// The MTHI and MTLO nodes are used only to match them in the incoming -// DAG for replacement by corresponding set_fsrhi, set_fsrlo insntructions. -// These nodes are not used for defining any instructions. -def MTLO : SDNode<"PIC16ISD::MTLO", SDTI8UnaryOp>; -def MTHI : SDNode<"PIC16ISD::MTHI", SDTI8UnaryOp>; -def MTPCLATH : SDNode<"PIC16ISD::MTPCLATH", SDTI8UnaryOp>; - -// Node to generate Bank Select for a GlobalAddress. -def Banksel : SDNode<"PIC16ISD::Banksel", SDTI8UnaryOp>; - -// Node to match a direct store operation. -def PIC16Store : SDNode<"PIC16ISD::PIC16Store", SDT_PIC16Store, [SDNPHasChain]>; -def PIC16StWF : SDNode<"PIC16ISD::PIC16StWF", SDT_PIC16Store, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; - -// Node to match a direct load operation. -def PIC16Load : SDNode<"PIC16ISD::PIC16Load", SDT_PIC16Load, [SDNPHasChain]>; -def PIC16LdArg : SDNode<"PIC16ISD::PIC16LdArg", SDT_PIC16Load, [SDNPHasChain]>; -def PIC16LdWF : SDNode<"PIC16ISD::PIC16LdWF", SDT_PIC16Load, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; -def PIC16Connect: SDNode<"PIC16ISD::PIC16Connect", SDT_PIC16Connect, []>; - -// Node to match PIC16 call -def PIC16call : SDNode<"PIC16ISD::CALL", SDT_PIC16call, - [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>; -def PIC16callw : SDNode<"PIC16ISD::CALLW", SDT_PIC16callw, - [SDNPHasChain , SDNPOptInFlag, SDNPOutFlag]>; - -// Node to match a comparison instruction. -def PIC16Subcc : SDNode<"PIC16ISD::SUBCC", SDTI8BinOp, [SDNPOutFlag]>; - -// Node to match a conditional branch. -def PIC16Brcond : SDNode<"PIC16ISD::BRCOND", SDT_PIC16Brcond, - [SDNPHasChain, SDNPInFlag]>; - -def PIC16Selecticc : SDNode<"PIC16ISD::SELECT_ICC", SDT_PIC16Selecticc, - [SDNPInFlag]>; - -def PIC16ret : SDNode<"PIC16ISD::RET", SDTNone, [SDNPHasChain]>; - -//===----------------------------------------------------------------------===// -// PIC16 Operand Definitions. -//===----------------------------------------------------------------------===// -def i8mem : Operand<i8>; -def brtarget: Operand<OtherVT>; - -// Operand for printing out a condition code. -let PrintMethod = "printCCOperand" in - def CCOp : Operand<i8>; - -include "PIC16InstrFormats.td" - -//===----------------------------------------------------------------------===// -// PIC16 Common Classes. -//===----------------------------------------------------------------------===// - -// W = W Op F : Load the value from F and do Op to W. -let Constraints = "$src = $dst", mayLoad = 1 in -class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>: - ByteFormat<OpCode, (outs GPR:$dst), - (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), - !strconcat(OpcStr, " $ptrlo + $offset, W"), - [(set GPR:$dst, (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo, - (i8 imm:$ptrhi), - (i8 imm:$offset))))]>; - -// F = F Op W : Load the value from F, do op with W and store in F. -// This insn class is not marked as TwoAddress because the reg is -// being used as a source operand only. (Remember a TwoAddress insn -// needs a copy.) -let mayStore = 1 in -class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>: - ByteFormat<OpCode, (outs), - (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), - !strconcat(OpcStr, " $ptrlo + $offset, F"), - [(PIC16Store (OpNode GPR:$src, (PIC16Load diraddr:$ptrlo, - (i8 imm:$ptrhi), - (i8 imm:$offset))), - diraddr:$ptrlo, - (i8 imm:$ptrhi), (i8 imm:$offset) - )]>; - -// W = W Op L : Do Op of L with W and place result in W. -let Constraints = "$src = $dst" in -class BinOpWL<bits<6> opcode, string OpcStr, SDNode OpNode> : - LiteralFormat<opcode, (outs GPR:$dst), - (ins GPR:$src, i8imm:$literal), - !strconcat(OpcStr, " $literal"), - [(set GPR:$dst, (OpNode GPR:$src, (i8 imm:$literal)))]>; - -//===----------------------------------------------------------------------===// -// PIC16 Instructions. -//===----------------------------------------------------------------------===// - -// Pseudo-instructions. -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i8imm:$amt), - "!ADJCALLSTACKDOWN $amt", - [(PIC16callseq_start imm:$amt)]>; - -def ADJCALLSTACKUP : Pseudo<(outs), (ins i8imm:$amt), - "!ADJCALLSTACKUP $amt", - [(PIC16callseq_end imm:$amt)]>; - -//----------------------------------- -// Vaious movlw insn patterns. -//----------------------------------- -let isReMaterializable = 1 in { -// Move 8-bit literal to W. -def movlw : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src), - "movlw $src", - [(set GPR:$dst, (i8 imm:$src))]>; - -// Move a Lo(TGA) to W. -def movlw_lo_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2), - "movlw LOW(${src} + ${src2})", - [(set GPR:$dst, (PIC16Lo tglobaladdr:$src, imm:$src2 ))]>; - -// Move a Lo(TES) to W. -def movlw_lo_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2), - "movlw LOW(${src} + ${src2})", - [(set GPR:$dst, (PIC16Lo texternalsym:$src, imm:$src2 ))]>; - -// Move a Hi(TGA) to W. -def movlw_hi_1 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2), - "movlw HIGH(${src} + ${src2})", - [(set GPR:$dst, (PIC16Hi tglobaladdr:$src, imm:$src2))]>; - -// Move a Hi(TES) to W. -def movlw_hi_2 : BitFormat<12, (outs GPR:$dst), (ins i8imm:$src, i8imm:$src2), - "movlw HIGH(${src} + ${src2})", - [(set GPR:$dst, (PIC16Hi texternalsym:$src, imm:$src2))]>; -} - -//------------------- -// FSR setting insns. -//------------------- -// These insns are matched via a DAG replacement pattern. -def set_fsrlo: - ByteFormat<0, (outs FSR16:$fsr), - (ins GPR:$val), - "movwf ${fsr}L", - []>; - -let Constraints = "$src = $dst" in -def set_fsrhi: - ByteFormat<0, (outs FSR16:$dst), - (ins FSR16:$src, GPR:$val), - "movwf ${dst}H", - []>; - -def set_pclath: - ByteFormat<0, (outs PCLATHR:$dst), - (ins GPR:$val), - "movwf ${dst}", - [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>; - -//---------------------------- -// copyPhysReg -// copyPhysReg insns. These are dummy. They should always be deleted -// by the optimizer and never be present in the final generated code. -// if they are, then we have to write correct macros for these insns. -//---------------------------- -def copy_fsr: - Pseudo<(outs FSR16:$dst), (ins FSR16:$src), "copy_fsr $dst, $src", []>; - -def copy_w: - Pseudo<(outs GPR:$dst), (ins GPR:$src), "copy_w $dst, $src", []>; - -class SAVE_FSR<string OpcStr>: - Pseudo<(outs), - (ins FSR16:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), - !strconcat(OpcStr, " $ptrlo, $offset"), - []>; - -def save_fsr0: SAVE_FSR<"save_fsr0">; -def save_fsr1: SAVE_FSR<"save_fsr1">; - -class RESTORE_FSR<string OpcStr>: - Pseudo<(outs FSR16:$dst), - (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), - !strconcat(OpcStr, " $ptrlo, $offset"), - []>; - -def restore_fsr0: RESTORE_FSR<"restore_fsr0">; -def restore_fsr1: RESTORE_FSR<"restore_fsr1">; - -//-------------------------- -// Store to memory -//------------------------- - -// Direct store. -// Input operands are: val = W, ptrlo = GA, offset = offset, ptrhi = banksel. -let mayStore = 1 in -class MOVWF_INSN<bits<6> OpCode, SDNode OpNodeDest, SDNode Op>: - ByteFormat<0, (outs), - (ins GPR:$val, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), - "movwf ${ptrlo} + ${offset}", - [(Op GPR:$val, OpNodeDest:$ptrlo, (i8 imm:$ptrhi), - (i8 imm:$offset))]>; - -// Store W to a Global Address. -def movwf : MOVWF_INSN<0, tglobaladdr, PIC16Store>; - -// Store W to an External Symobol. -def movwf_1 : MOVWF_INSN<0, texternalsym, PIC16Store>; - -// Store with InFlag and OutFlag -// This is same as movwf_1 but has a flag. A flag is required to -// order the stores while passing the params to function. -def movwf_2 : MOVWF_INSN<0, texternalsym, PIC16StWF>; - -// Indirect store. Matched via a DAG replacement pattern. -def store_indirect : - ByteFormat<0, (outs), - (ins GPR:$val, FSR16:$fsr, i8imm:$offset), - "movwi $offset[$fsr]", - []>; - -//---------------------------- -// Load from memory -//---------------------------- -// Direct load. -// Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel. -// Output: dst = W -let Defs = [STATUS], mayLoad = 1 in -class MOVF_INSN<bits<6> OpCode, SDNode OpNodeSrc, SDNode Op>: - ByteFormat<0, (outs GPR:$dst), - (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), - "movf ${ptrlo} + ${offset}, W", - [(set GPR:$dst, - (Op OpNodeSrc:$ptrlo, (i8 imm:$ptrhi), - (i8 imm:$offset)))]>; - -// Load from a GA. -def movf : MOVF_INSN<0, tglobaladdr, PIC16Load>; - -// Load from an ES. -def movf_1 : MOVF_INSN<0, texternalsym, PIC16Load>; -def movf_1_1 : MOVF_INSN<0, texternalsym, PIC16LdArg>; - -// Load with InFlag and OutFlag -// This is same as movf_1 but has a flag. A flag is required to -// order the loads while copying the return value of a function. -def movf_2 : MOVF_INSN<0, texternalsym, PIC16LdWF>; - -// Indirect load. Matched via a DAG replacement pattern. -def load_indirect : - ByteFormat<0, (outs GPR:$dst), - (ins FSR16:$fsr, i8imm:$offset), - "moviw $offset[$fsr]", - []>; - -//------------------------- -// Bitwise operations patterns -//-------------------------- -// W = W op [F] -let Defs = [STATUS] in { -def OrFW : BinOpFW<0, "iorwf", or>; -def XOrFW : BinOpFW<0, "xorwf", xor>; -def AndFW : BinOpFW<0, "andwf", and>; - -// F = W op [F] -def OrWF : BinOpWF<0, "iorwf", or>; -def XOrWF : BinOpWF<0, "xorwf", xor>; -def AndWF : BinOpWF<0, "andwf", and>; - -//------------------------- -// Various add/sub patterns. -//------------------------- - -// W = W + [F] -def addfw_1: BinOpFW<0, "addwf", add>; -def addfw_2: BinOpFW<0, "addwf", addc>; - -let Uses = [STATUS] in -def addfwc: BinOpFW<0, "addwfc", adde>; // With Carry. - -// F = W + [F] -def addwf_1: BinOpWF<0, "addwf", add>; -def addwf_2: BinOpWF<0, "addwf", addc>; -let Uses = [STATUS] in -def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry. -} - -// W -= [F] ; load from F and sub the value from W. -let Constraints = "$src = $dst", mayLoad = 1 in -class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>: - ByteFormat<OpCode, (outs GPR:$dst), - (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), - !strconcat(OpcStr, " $ptrlo + $offset, W"), - [(set GPR:$dst, (OpNode (PIC16Load diraddr:$ptrlo, - (i8 imm:$ptrhi), (i8 imm:$offset)), - GPR:$src))]>; -let Defs = [STATUS] in { -def subfw_1: SUBFW<0, "subwf", sub>; -def subfw_2: SUBFW<0, "subwf", subc>; - -let Uses = [STATUS] in -def subfwb: SUBFW<0, "subwfb", sube>; // With Borrow. - -} -let Defs = [STATUS], isTerminator = 1 in -def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>; - -// [F] -= W ; -let mayStore = 1 in -class SUBWF<bits<6> OpCode, string OpcStr, SDNode OpNode>: - ByteFormat<OpCode, (outs), - (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), - !strconcat(OpcStr, " $ptrlo + $offset"), - [(PIC16Store (OpNode (PIC16Load diraddr:$ptrlo, - (i8 imm:$ptrhi), (i8 imm:$offset)), - GPR:$src), diraddr:$ptrlo, - (i8 imm:$ptrhi), (i8 imm:$offset))]>; - -let Defs = [STATUS] in { -def subwf_1: SUBWF<0, "subwf", sub>; -def subwf_2: SUBWF<0, "subwf", subc>; - -let Uses = [STATUS] in - def subwfb: SUBWF<0, "subwfb", sube>; // With Borrow. - -def subwf_cc: SUBWF<0, "subwf", PIC16Subcc>; -} - -// addlw -let Defs = [STATUS] in { -def addlw_1 : BinOpWL<0, "addlw", add>; -def addlw_2 : BinOpWL<0, "addlw", addc>; - -let Uses = [STATUS] in -def addlwc : BinOpWL<0, "addlwc", adde>; // With Carry. (Assembler macro). - -// bitwise operations involving a literal and w. -def andlw : BinOpWL<0, "andlw", and>; -def xorlw : BinOpWL<0, "xorlw", xor>; -def orlw : BinOpWL<0, "iorlw", or>; -} - -// sublw -// W = C - W ; sub W from literal. (Without borrow). -let Constraints = "$src = $dst" in -class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> : - LiteralFormat<opcode, (outs GPR:$dst), - (ins GPR:$src, i8imm:$literal), - !strconcat(OpcStr, " $literal"), - [(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>; -// subwl -// W = W - C ; sub literal from W (Without borrow). -let Constraints = "$src = $dst" in -class SUBWL<bits<6> opcode, string OpcStr, SDNode OpNode> : - LiteralFormat<opcode, (outs GPR:$dst), - (ins GPR:$src, i8imm:$literal), - !strconcat(OpcStr, " $literal"), - [(set GPR:$dst, (OpNode GPR:$src, (i8 imm:$literal)))]>; - -let Defs = [STATUS] in { -def sublw_1 : SUBLW<0, "sublw", sub>; -def sublw_2 : SUBLW<0, "sublw", subc>; -def sublw_3 : SUBLW<0, "sublwb", sube>; // With borrow (Assembler macro). - -def sublw_4 : SUBWL<0, "subwl", sub>; // Assembler macro replace with addlw -def sublw_5 : SUBWL<0, "subwl", subc>; // Assembler macro replace with addlw -def sublw_6 : SUBWL<0, "subwlb", sube>; // With borrow (Assembler macro). -} -let Defs = [STATUS], isTerminator = 1 in -def sublw_cc : SUBLW<0, "sublw", PIC16Subcc>; - -// Call instruction. -let isCall = 1, - Defs = [W, FSR0, FSR1] in { - def CALL: LiteralFormat<0x1, (outs), (ins i8imm:$func), - //"call ${func} + 2", - "call ${func}", - [(PIC16call diraddr:$func)]>; -} - -let isCall = 1, - Defs = [W, FSR0, FSR1] in { - def CALL_1: LiteralFormat<0x1, (outs), (ins GPR:$func, PCLATHR:$pc), - "callw", - [(PIC16call (PIC16Connect GPR:$func, PCLATHR:$pc))]>; -} - -let isCall = 1, - Defs = [FSR0, FSR1] in { - def CALLW: LiteralFormat<0x1, (outs GPR:$dest), - (ins GPR:$func, PCLATHR:$pc), - "callw", - [(set GPR:$dest, (PIC16callw (PIC16Connect GPR:$func, PCLATHR:$pc)))]>; -} - -let Uses = [STATUS], isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in -def pic16brcond: ControlFormat<0x0, (outs), (ins brtarget:$dst, CCOp:$cc), - "b$cc $dst", - [(PIC16Brcond bb:$dst, imm:$cc)]>; - -// Unconditional branch. -let isBranch = 1, isTerminator = 1, hasDelaySlot = 0 in -def br_uncond: ControlFormat<0x0, (outs), (ins brtarget:$dst), - "goto $dst", - [(br bb:$dst)]>; - -// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after -// instruction selection into a branch sequence. -let usesCustomInserter = 1 in { // Expanded after instruction selection. - def SELECT_CC_Int_ICC - : Pseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, i8imm:$Cond), - "; SELECT_CC_Int_ICC PSEUDO!", - [(set GPR:$dst, (PIC16Selecticc GPR:$T, GPR:$F, - imm:$Cond))]>; -} - - -// Banksel. -def banksel : - Pseudo<(outs), - (ins i8mem:$ptr), - "banksel $ptr", - []>; - -def pagesel : - Pseudo<(outs), - (ins i8mem:$ptr), - "movlp $ptr", - []>; - - -// Return insn. -let isTerminator = 1, isBarrier = 1, isReturn = 1 in -def Return : - ControlFormat<0, (outs), (ins), "return", [(PIC16ret)]>; - -//===----------------------------------------------------------------------===// -// PIC16 Replacment Patterns. -//===----------------------------------------------------------------------===// - -// Identify an indirect store and select insns for it. -def : Pat<(PIC16Store GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), - imm:$offset), - (store_indirect GPR:$val, - (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr), - imm:$offset)>; - -def : Pat<(PIC16StWF GPR:$val, (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), - imm:$offset), - (store_indirect GPR:$val, - (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr), - imm:$offset)>; - -// Identify an indirect load and select insns for it. -def : Pat<(PIC16Load (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), - imm:$offset), - (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr), - imm:$offset)>; - -def : Pat<(PIC16LdWF (MTLO GPR:$loaddr), (MTHI GPR:$hiaddr), - imm:$offset), - (load_indirect (set_fsrhi (set_fsrlo GPR:$loaddr), GPR:$hiaddr), - imm:$offset)>; - diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp deleted file mode 100644 index 1bcc497..0000000 --- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//===-- PIC16MCAsmInfo.cpp - PIC16 asm properties -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the PIC16MCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "PIC16MCAsmInfo.h" - -// FIXME: Layering violation to get enums and static function, should be moved -// to separate headers. -#include "PIC16.h" -#include "PIC16ABINames.h" -#include "PIC16ISelLowering.h" -using namespace llvm; - -PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, StringRef TT) { - CommentString = ";"; - GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL); - GlobalDirective = "\tglobal\t"; - ExternDirective = "\textern\t"; - - Data8bitsDirective = " db "; - Data16bitsDirective = " dw "; - Data32bitsDirective = " dl "; - Data64bitsDirective = NULL; - ZeroDirective = NULL; - AsciiDirective = " dt "; - AscizDirective = NULL; - - RomData8bitsDirective = " dw "; - RomData16bitsDirective = " rom_di "; - RomData32bitsDirective = " rom_dl "; - HasSetDirective = false; - - // Set it to false because we weed to generate c file name and not bc file - // name. - HasSingleParameterDotFile = false; -} - -const char *PIC16MCAsmInfo::getDataASDirective(unsigned Size, - unsigned AS) const { - if (AS != PIC16ISD::ROM_SPACE) - return 0; - - switch (Size) { - case 8: return RomData8bitsDirective; - case 16: return RomData16bitsDirective; - case 32: return RomData32bitsDirective; - default: return NULL; - } -} - diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.h b/lib/Target/PIC16/PIC16MCAsmInfo.h deleted file mode 100644 index 6e1c111..0000000 --- a/lib/Target/PIC16/PIC16MCAsmInfo.h +++ /dev/null @@ -1,35 +0,0 @@ -//=====-- PIC16MCAsmInfo.h - PIC16 asm properties -------------*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the PIC16MCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16TARGETASMINFO_H -#define PIC16TARGETASMINFO_H - -#include "llvm/MC/MCAsmInfo.h" - -namespace llvm { - class Target; - class StringRef; - - class PIC16MCAsmInfo : public MCAsmInfo { - const char *RomData8bitsDirective; - const char *RomData16bitsDirective; - const char *RomData32bitsDirective; - public: - PIC16MCAsmInfo(const Target &T, StringRef TT); - - virtual const char *getDataASDirective(unsigned size, unsigned AS) const; - }; - -} // namespace llvm - -#endif diff --git a/lib/Target/PIC16/PIC16MachineFunctionInfo.h b/lib/Target/PIC16/PIC16MachineFunctionInfo.h deleted file mode 100644 index bdf5086..0000000 --- a/lib/Target/PIC16/PIC16MachineFunctionInfo.h +++ /dev/null @@ -1,52 +0,0 @@ -//====- PIC16MachineFuctionInfo.h - PIC16 machine function info -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares PIC16-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16MACHINEFUNCTIONINFO_H -#define PIC16MACHINEFUNCTIONINFO_H - -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -/// PIC16MachineFunctionInfo - This class is derived from MachineFunction -/// private PIC16 target-specific information for each MachineFunction. -class PIC16MachineFunctionInfo : public MachineFunctionInfo { - // The frameindexes generated for spill/reload are stack based. - // This maps maintain zero based indexes for these FIs. - std::map<unsigned, unsigned> FiTmpOffsetMap; - unsigned TmpSize; - - // These are the frames for return value and argument passing - // These FrameIndices will be expanded to foo.frame external symbol - // and all others will be expanded to foo.tmp external symbol. - unsigned ReservedFrameCount; - -public: - PIC16MachineFunctionInfo() - : TmpSize(0), ReservedFrameCount(0) {} - - explicit PIC16MachineFunctionInfo(MachineFunction &MF) - : TmpSize(0), ReservedFrameCount(0) {} - - std::map<unsigned, unsigned> &getFiTmpOffsetMap() { return FiTmpOffsetMap; } - - unsigned getTmpSize() const { return TmpSize; } - void setTmpSize(unsigned Size) { TmpSize = Size; } - - unsigned getReservedFrameCount() const { return ReservedFrameCount; } - void setReservedFrameCount(unsigned Count) { ReservedFrameCount = Count; } -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp deleted file mode 100644 index b6aa38f..0000000 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ /dev/null @@ -1,254 +0,0 @@ -//===-- PIC16MemSelOpt.cpp - PIC16 banksel optimizer --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the pass which optimizes the emitting of banksel -// instructions before accessing data memory. This currently works within -// a basic block only and keep tracks of the last accessed memory bank. -// If memory access continues to be in the same bank it just makes banksel -// immediate, which is a part of the insn accessing the data memory, from 1 -// to zero. The asm printer emits a banksel only if that immediate is 1. -// -// FIXME: this is not implemented yet. The banksel pass only works on local -// basic blocks. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pic16-codegen" -#include "PIC16.h" -#include "PIC16ABINames.h" -#include "PIC16InstrInfo.h" -#include "PIC16MCAsmInfo.h" -#include "PIC16TargetMachine.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/GlobalValue.h" -#include "llvm/DerivedTypes.h" - -using namespace llvm; - -namespace { - struct MemSelOpt : public MachineFunctionPass { - static char ID; - MemSelOpt() : MachineFunctionPass(ID) {} - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "PIC16 Memsel Optimizer"; - } - - bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); - bool processInstruction(MachineInstr *MI); - - private: - const TargetInstrInfo *TII; // Machine instruction info. - MachineBasicBlock *MBB; // Current basic block - std::string CurBank; - int PageChanged; - - }; - char MemSelOpt::ID = 0; -} - -FunctionPass *llvm::createPIC16MemSelOptimizerPass() { - return new MemSelOpt(); -} - - -/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP -/// register references into FP stack references. -/// -bool MemSelOpt::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - Changed |= processBasicBlock(MF, *I); - } - - return Changed; -} - -/// processBasicBlock - Loop over all of the instructions in the basic block, -/// transforming FP instructions into their stack form. -/// -bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { - bool Changed = false; - MBB = &BB; - - // Let us assume that when entering a basic block now bank is selected. - // Ideally we should look at the predecessors for this information. - CurBank=""; - PageChanged=0; - - MachineBasicBlock::iterator I; - for (I = BB.begin(); I != BB.end(); ++I) { - Changed |= processInstruction(I); - - // if the page has changed insert a page sel before - // any instruction that needs one - if (PageChanged == 1) - { - // Restore the page if it was changed, before leaving the basic block, - // because it may be required by the goto terminator or the fall thru - // basic blcok. - // If the terminator is return, we don't need to restore since there - // is no goto or fall thru basic block. - if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto - (I->getOpcode() == PIC16::sublw_6) || //macro has goto - (I->getOpcode() == PIC16::addlwc) || //macro has goto - (TII->get(I->getOpcode()).isBranch())) - { - DebugLoc dl = I->getDebugLoc(); - BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$"); - Changed = true; - PageChanged = 0; - } - } - } - - // The basic block is over, but if we did not find any goto yet, - // we haven't restored the page. - // Restore the page if it was changed, before leaving the basic block, - // because it may be required by fall thru basic blcok. - // If the terminator is return, we don't need to restore since there - // is fall thru basic block. - if (PageChanged == 1) { - // save the end pointer before we move back to last insn. - MachineBasicBlock::iterator J = I; - I--; - const TargetInstrDesc &TID = TII->get(I->getOpcode()); - if (! TID.isReturn()) - { - DebugLoc dl = I->getDebugLoc(); - BuildMI(*MBB, J, dl, - TII->get(PIC16::pagesel)).addExternalSymbol("$"); - Changed = true; - PageChanged = 0; - } - } - - - return Changed; -} - -bool MemSelOpt::processInstruction(MachineInstr *MI) { - bool Changed = false; - - unsigned NumOperands = MI->getNumOperands(); - if (NumOperands == 0) return false; - - - // If this insn is not going to access any memory, return. - const TargetInstrDesc &TID = TII->get(MI->getOpcode()); - if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore())) - return false; - - // The first thing we should do is that record if banksel/pagesel are - // changed in an unknown way. This can happend via any type of call. - // We do it here first before scanning of MemOp / BBOp as the indirect - // call insns do not have any operands, but they still may change bank/page. - if (TID.isCall()) { - // Record that we have changed the page, so that we can restore it - // before basic block ends. - // We require to signal that a page anc bank change happened even for - // indirect calls. - PageChanged = 1; - - // When a call is made, there may be banksel for variables in callee. - // Hence the banksel in caller needs to be reset. - CurBank = ""; - } - - // Scan for the memory address operand. - // FIXME: Should we use standard interfaces like memoperands_iterator, - // hasMemOperand() etc ? - int MemOpPos = -1; - int BBOpPos = -1; - for (unsigned i = 0; i < NumOperands; i++) { - MachineOperand Op = MI->getOperand(i); - if (Op.getType() == MachineOperand::MO_GlobalAddress || - Op.getType() == MachineOperand::MO_ExternalSymbol) { - // We found one mem operand. Next one may be BS. - MemOpPos = i; - } - if (Op.getType() == MachineOperand::MO_MachineBasicBlock) { - // We found one BB operand. Next one may be pagesel. - BBOpPos = i; - } - } - - // If we did not find an insn accessing memory. Continue. - if ((MemOpPos == -1) && - (BBOpPos == -1)) - return false; - assert ((BBOpPos != MemOpPos) && "operand can only be of one type"); - - - // If this is a pagesel material, handle it first. - // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp. - // Pagesel is required only for a direct call. - if ((MI->getOpcode() == PIC16::CALL)) { - // Get the BBOp. - MachineOperand &MemOp = MI->getOperand(MemOpPos); - DebugLoc dl = MI->getDebugLoc(); - BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp); - - // CALL and br_ucond needs only pagesel. so we are done. - return true; - } - - // Pagesel is handled. Now, add a Banksel if needed. - if (MemOpPos == -1) return Changed; - // Get the MemOp. - MachineOperand &Op = MI->getOperand(MemOpPos); - - // Get the section name(NewBank) for MemOp. - // This assumes that the section names for globals are already set by - // AsmPrinter->doInitialization. - std::string NewBank = CurBank; - bool hasExternalLinkage = false; - if (Op.getType() == MachineOperand::MO_GlobalAddress && - Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) { - if (Op.getGlobal()->hasExternalLinkage()) - hasExternalLinkage= true; - NewBank = Op.getGlobal()->getSection(); - } else if (Op.getType() == MachineOperand::MO_ExternalSymbol) { - // External Symbol is generated for temp data and arguments. They are - // in fpdata.<functionname>.# section. - std::string Sym = Op.getSymbolName(); - NewBank = PAN::getSectionNameForSym(Sym); - } - - // If the section is shared section, do not emit banksel. - if (NewBank == PAN::getSharedUDataSectionName()) - return Changed; - - // If the previous and new section names are same, we don't need to - // emit banksel. - if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) { - DebugLoc dl = MI->getDebugLoc(); - BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)). - addOperand(Op); - Changed = true; - CurBank = NewBank; - } - - return Changed; -} - diff --git a/lib/Target/PIC16/PIC16Passes/Makefile b/lib/Target/PIC16/PIC16Passes/Makefile deleted file mode 100644 index 9684b8d..0000000 --- a/lib/Target/PIC16/PIC16Passes/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PIC16/PIC16Passes/Makefile -----------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -TARGET = PIC16 -LIBRARYNAME = LLVMpic16passes -BUILD_ARCHIVE = 1 - -include $(LEVEL)/Makefile.common - diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp deleted file mode 100644 index 56f0211..0000000 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp +++ /dev/null @@ -1,299 +0,0 @@ -//===-- PIC16Cloner.cpp - PIC16 LLVM Cloner for shared functions -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to clone all functions that are shared between -// the main line code (ML) and interrupt line code (IL). It clones all such -// shared functions and their automatic global vars by adding the .IL suffix. -// -// This pass is supposed to be run on the linked .bc module. -// It traveses the module call graph twice. Once starting from the main function -// and marking each reached function as "ML". Again, starting from the ISR -// and cloning any reachable function that was marked as "ML". After cloning -// the function, it remaps all the call sites in IL functions to call the -// cloned functions. -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Pass.h" -#include "llvm/Module.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "PIC16Cloner.h" -#include "../PIC16ABINames.h" -#include <vector> - -using namespace llvm; -using std::vector; -using std::string; -using std::map; - -namespace llvm { - char PIC16Cloner::ID = 0; - - ModulePass *createPIC16ClonerPass() { return new PIC16Cloner(); } -} - -// We currently intend to run these passes in opt, which does not have any -// diagnostic support. So use these functions for now. In future -// we will probably write our own driver tool. -// -void PIC16Cloner::reportError(string ErrorString) { - errs() << "ERROR : " << ErrorString << "\n"; - exit(1); -} - -void PIC16Cloner:: -reportError (string ErrorString, vector<string> &Values) { - unsigned ValCount = Values.size(); - string TargetString; - for (unsigned i=0; i<ValCount; ++i) { - TargetString = "%"; - TargetString += ((char)i + '0'); - ErrorString.replace(ErrorString.find(TargetString), TargetString.length(), - Values[i]); - } - errs() << "ERROR : " << ErrorString << "\n"; - exit(1); -} - - -// Entry point -// -bool PIC16Cloner::runOnModule(Module &M) { - CallGraph &CG = getAnalysis<CallGraph>(); - - // Search for the "main" and "ISR" functions. - CallGraphNode *mainCGN = NULL, *isrCGN = NULL; - for (CallGraph::iterator it = CG.begin() ; it != CG.end(); it++) - { - // External calling node doesn't have any function associated with it. - if (! it->first) - continue; - - if (it->first->getName().str() == "main") { - mainCGN = it->second; - } - - if (PAN::isISR(it->first->getSection())) { - isrCGN = it->second; - } - - // Don't search further if we've found both. - if (mainCGN && isrCGN) - break; - } - - // We have nothing to do if any of the main or ISR is missing. - if (! mainCGN || ! isrCGN) return false; - - // Time for some diagnostics. - // See if the main itself is interrupt function then report an error. - if (PAN::isISR(mainCGN->getFunction()->getSection())) { - reportError("Function 'main' can't be interrupt function"); - } - - - // Mark all reachable functions from main as ML. - markCallGraph(mainCGN, "ML"); - - // And then all the functions reachable from ISR will be cloned. - cloneSharedFunctions(isrCGN); - - return true; -} - -// Mark all reachable functions from the given node, with the given mark. -// -void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) { - // Mark the top node first. - Function *thisF = CGN->getFunction(); - - thisF->setSection(StringMark); - - // Mark all the called functions - for(CallGraphNode::iterator cgn_it = CGN->begin(); - cgn_it != CGN->end(); ++cgn_it) { - Function *CalledF = cgn_it->second->getFunction(); - - // If calling an external function then CallGraphNode - // will not be associated with any function. - if (! CalledF) - continue; - - // Issue diagnostic if interrupt function is being called. - if (PAN::isISR(CalledF->getSection())) { - vector<string> Values; - Values.push_back(CalledF->getName().str()); - reportError("Interrupt function (%0) can't be called", Values); - } - - // Has already been mark - if (CalledF->getSection().find(StringMark) != string::npos) { - // Should we do anything here? - } else { - // Mark now - CalledF->setSection(StringMark); - } - - // Before going any further mark all the called function by current - // function. - markCallGraph(cgn_it->second ,StringMark); - } // end of loop of all called functions. -} - - -// For PIC16, automatic variables of a function are emitted as globals. -// Clone the auto variables of a function and put them in VMap, -// this VMap will be used while -// Cloning the code of function itself. -// -void PIC16Cloner::CloneAutos(Function *F) { - // We'll need to update module's globals list as well. So keep a reference - // handy. - Module *M = F->getParent(); - Module::GlobalListType &Globals = M->getGlobalList(); - - // Clear the leftovers in VMap by any previous cloning. - VMap.clear(); - - // Find the auto globls for this function and clone them, and put them - // in VMap. - std::string FnName = F->getName().str(); - std::string VarName, ClonedVarName; - for (Module::global_iterator I = M->global_begin(), E = M->global_end(); - I != E; ++I) { - VarName = I->getName().str(); - if (PAN::isLocalToFunc(FnName, VarName)) { - // Auto variable for current function found. Clone it. - const GlobalVariable *GV = I; - - const Type *InitTy = GV->getInitializer()->getType(); - GlobalVariable *ClonedGV = - new GlobalVariable(InitTy, false, GV->getLinkage(), - GV->getInitializer()); - ClonedGV->setName(PAN::getCloneVarName(FnName, VarName)); - // Add these new globals to module's globals list. - Globals.push_back(ClonedGV); - - // Update VMap. - VMap[GV] = ClonedGV; - } - } -} - - -// Clone all functions that are reachable from ISR and are already -// marked as ML. -// -void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) { - - // Check all the called functions from ISR. - for(CallGraphNode::iterator cgn_it = CGN->begin(); - cgn_it != CGN->end(); ++cgn_it) { - Function *CalledF = cgn_it->second->getFunction(); - - // If calling an external function then CallGraphNode - // will not be associated with any function. - if (!CalledF) - continue; - - // Issue diagnostic if interrupt function is being called. - if (PAN::isISR(CalledF->getSection())) { - vector<string> Values; - Values.push_back(CalledF->getName().str()); - reportError("Interrupt function (%0) can't be called", Values); - } - - if (CalledF->getSection().find("ML") != string::npos) { - // Function is alternatively marked. It should be a shared one. - // Create IL copy. Passing called function as first argument - // and the caller as the second argument. - - // Before making IL copy, first ensure that this function has a - // body. If the function does have a body. It can't be cloned. - // Such a case may occur when the function has been declarated - // in the C source code but its body exists in assembly file. - if (!CalledF->isDeclaration()) { - Function *cf = cloneFunction(CalledF); - remapAllSites(CGN->getFunction(), CalledF, cf); - } else { - // It is called only from ISR. Still mark it as we need this info - // in code gen while calling intrinsics.Function is not marked. - CalledF->setSection("IL"); - } - } - // Before going any further clone all the shared function reachaable - // by current function. - cloneSharedFunctions(cgn_it->second); - } // end of loop of all called functions. -} - -// Clone the given function and return it. -// Note: it uses the VMap member of the class, which is already populated -// by cloneAutos by the time we reach here. -// FIXME: Should we just pass VMap's ref as a parameter here? rather -// than keeping the VMap as a member. -Function * -PIC16Cloner::cloneFunction(Function *OrgF) { - Function *ClonedF; - - // See if we already cloned it. Return that. - cloned_map_iterator cm_it = ClonedFunctionMap.find(OrgF); - if(cm_it != ClonedFunctionMap.end()) { - ClonedF = cm_it->second; - return ClonedF; - } - - // Clone does not exist. - // First clone the autos, and populate VMap. - CloneAutos(OrgF); - - // Now create the clone. - ClonedF = CloneFunction(OrgF, VMap, /*ModuleLevelChanges=*/false); - - // The new function should be for interrupt line. Therefore should have - // the name suffixed with IL and section attribute marked with IL. - ClonedF->setName(PAN::getCloneFnName(OrgF->getName())); - ClonedF->setSection("IL"); - - // Add the newly created function to the module. - OrgF->getParent()->getFunctionList().push_back(ClonedF); - - // Update the ClonedFunctionMap to record this cloning activity. - ClonedFunctionMap[OrgF] = ClonedF; - - return ClonedF; -} - - -// Remap the call sites of shared functions, that are in IL. -// Change the IL call site of a shared function to its clone. -// -void PIC16Cloner:: -remapAllSites(Function *Caller, Function *OrgF, Function *Clone) { - // First find the caller to update. If the caller itself is cloned - // then use the cloned caller. Otherwise use it. - cloned_map_iterator cm_it = ClonedFunctionMap.find(Caller); - if (cm_it != ClonedFunctionMap.end()) - Caller = cm_it->second; - - // For the lack of a better call site finding mechanism, iterate over - // all insns to find the uses of original fn. - for (Function::iterator BI = Caller->begin(); BI != Caller->end(); ++BI) { - BasicBlock &BB = *BI; - for (BasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) { - if (II->getNumOperands() > 0 && II->getOperand(0) == OrgF) - II->setOperand(0, Clone); - } - } -} - - - diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h deleted file mode 100644 index e7d67ce..0000000 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h +++ /dev/null @@ -1,83 +0,0 @@ -//===-- PIC16Cloner.h - PIC16 LLVM Cloner for shared functions --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains declaration of a cloner class clone all functions that -// are shared between the main line code (ML) and interrupt line code (IL). -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16CLONER_H -#define PIC16CLONER_H - -#include "llvm/ADT/ValueMap.h" - -using namespace llvm; -using std::vector; -using std::string; -using std::map; - -namespace llvm { - // forward classes. - class Value; - class Function; - class Module; - class ModulePass; - class CallGraph; - class CallGraphNode; - class AnalysisUsage; - - class PIC16Cloner : public ModulePass { - public: - static char ID; // Class identification - PIC16Cloner() : ModulePass(ID) {} - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<CallGraph>(); - } - virtual bool runOnModule(Module &M); - - private: // Functions - // Mark reachable functions for the MainLine or InterruptLine. - void markCallGraph(CallGraphNode *CGN, string StringMark); - - // Clone auto variables of function specified. - void CloneAutos(Function *F); - - // Clone the body of a function. - Function *cloneFunction(Function *F); - - // Clone all shared functions. - void cloneSharedFunctions(CallGraphNode *isrCGN); - - // Remap all call sites to the shared function. - void remapAllSites(Function *Caller, Function *OrgF, Function *Clone); - - // Error reporting for PIC16Pass - void reportError(string ErrorString, vector<string> &Values); - void reportError(string ErrorString); - - private: //data - // Records if the interrupt function has already been found. - // If more than one interrupt function is found then an error - // should be thrown. - bool foundISR; - - // This ValueMap maps the auto variables of the original functions with - // the corresponding cloned auto variable of the cloned function. - // This value map is passed during the function cloning so that all the - // uses of auto variables be updated properly. - ValueMap<const Value*, Value*> VMap; - - // Map of a already cloned functions. - map<Function *, Function *> ClonedFunctionMap; - typedef map<Function *, Function *>::iterator cloned_map_iterator; - }; -} // End of anonymous namespace - -#endif diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp deleted file mode 100644 index 0f8928a..0000000 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp +++ /dev/null @@ -1,182 +0,0 @@ -//===-- PIC16Overlay.cpp - Implementation for PIC16 Frame Overlay===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PIC16 Frame Overlay implementation. -// -//===----------------------------------------------------------------------===// - - -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Pass.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/Value.h" -#include "PIC16Overlay.h" -#include "llvm/Function.h" -#include <cstdlib> -#include <sstream> -using namespace llvm; - -namespace llvm { - char PIC16Overlay::ID = 0; - ModulePass *createPIC16OverlayPass() { return new PIC16Overlay(); } -} - -void PIC16Overlay::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<CallGraph>(); -} - -void PIC16Overlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) { - // Do not set any color for external calling node. - if (Depth != 0 && CGN->getFunction()) { - unsigned Color = getColor(CGN->getFunction()); - - // Handle indirectly called functions - if (Color >= PIC16OVERLAY::StartIndirectCallColor || - Depth >= PIC16OVERLAY::StartIndirectCallColor) { - // All functions called from an indirectly called function are given - // an unique color. - if (Color < PIC16OVERLAY::StartIndirectCallColor && - Depth >= PIC16OVERLAY::StartIndirectCallColor) - setColor(CGN->getFunction(), Depth); - - for (unsigned int i = 0; i < CGN->size(); i++) - DFSTraverse((*CGN)[i], ++IndirectCallColor); - return; - } - // Just return if the node already has a color greater than the current - // depth. A node must be colored with the maximum depth that it has. - if (Color >= Depth) - return; - - Depth = ModifyDepthForInterrupt(CGN, Depth); - setColor(CGN->getFunction(), Depth); - } - - // Color all children of this node with color depth+1. - for (unsigned int i = 0; i < CGN->size(); i++) - DFSTraverse((*CGN)[i], Depth+1); -} - -unsigned PIC16Overlay::ModifyDepthForInterrupt(CallGraphNode *CGN, - unsigned Depth) { - Function *Fn = CGN->getFunction(); - - // Return original Depth if function or section for function do not exist. - if (!Fn || !Fn->hasSection()) - return Depth; - - // Return original Depth if this function is not marked as interrupt. - if (Fn->getSection().find("interrupt") == string::npos) - return Depth; - - Depth = Depth + InterruptDepth; - return Depth; -} - -void PIC16Overlay::setColor(Function *Fn, unsigned Color) { - std::string Section = ""; - if (Fn->hasSection()) - Section = Fn->getSection(); - - size_t Pos = Section.find(OverlayStr); - - // Convert Color to string. - std::stringstream ss; - ss << Color; - std::string ColorString = ss.str(); - - // If color is already set then reset it with the new value. Else append - // the Color string to section. - if (Pos != std::string::npos) { - Pos += OverlayStr.length(); - char c = Section.at(Pos); - unsigned OldColorLength = 0; - while (c >= '0' && c<= '9') { - OldColorLength++; - if (Pos < Section.length() - 1) - Pos++; - else - break; - c = Section.at(Pos); - } - // Replace old color with new one. - Section.replace(Pos-OldColorLength +1, OldColorLength, ColorString); - } - else { - // Append Color information to section string. - if (Fn->hasSection()) - Section.append(" "); - Section.append(OverlayStr + ColorString); - } - Fn->setSection(Section); -} - -unsigned PIC16Overlay::getColor(Function *Fn) { - int Color = 0; - if (!Fn->hasSection()) - return 0; - - std::string Section = Fn->getSection(); - size_t Pos = Section.find(OverlayStr); - - // Return 0 if Color is not set. - if (Pos == std::string::npos) - return 0; - - // Set Pos to after "Overlay=". - Pos += OverlayStr.length(); - char c = Section.at(Pos); - std::string ColorString = ""; - - // Find the string representing Color. A Color can only consist of digits. - while (c >= '0' && c<= '9') { - ColorString.append(1,c); - if (Pos < Section.length() - 1) - Pos++; - else - break; - c = Section.at(Pos); - } - Color = atoi(ColorString.c_str()); - - return Color; -} - -bool PIC16Overlay::runOnModule(Module &M) { - CallGraph &CG = getAnalysis<CallGraph>(); - CallGraphNode *ECN = CG.getExternalCallingNode(); - - MarkIndirectlyCalledFunctions(M); - // Since External Calling Node is the base function, do a depth first - // traversal of CallGraph with ECN as root. Each node with be marked with - // a color that is max(color(callers)) + 1. - if(ECN) { - DFSTraverse(ECN, 0); - } - return false; -} - -void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) { - // If the use of a function is not a call instruction then this - // function might be called indirectly. In that case give it - // an unique color. - for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) { - for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E; - ++I) { - User *U = *I; - if ((!isa<CallInst>(U) && !isa<InvokeInst>(U)) - || !CallSite(cast<Instruction>(U)).isCallee(I)) { - setColor(MI, ++IndirectCallColor); - break; - } - } - } -} diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h deleted file mode 100644 index 2f611e6..0000000 --- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h +++ /dev/null @@ -1,60 +0,0 @@ -//===-- PIC16Overlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PIC16 Overlay infrastructure. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16FRAMEOVERLAY_H -#define PIC16FRAMEOVERLAY_H - - -using std::string; -using namespace llvm; - -namespace llvm { - // Forward declarations. - class Function; - class Module; - class ModulePass; - class AnalysisUsage; - class CallGraphNode; - class CallGraph; - - namespace PIC16OVERLAY { - enum OverlayConsts { - StartInterruptColor = 200, - StartIndirectCallColor = 300 - }; - } - class PIC16Overlay : public ModulePass { - std::string OverlayStr; - unsigned InterruptDepth; - unsigned IndirectCallColor; - public: - static char ID; // Class identification - PIC16Overlay() : ModulePass(ID) { - OverlayStr = "Overlay="; - InterruptDepth = PIC16OVERLAY::StartInterruptColor; - IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnModule(Module &M); - - private: - unsigned getColor(Function *Fn); - void setColor(Function *Fn, unsigned Color); - unsigned ModifyDepthForInterrupt(CallGraphNode *CGN, unsigned Depth); - void MarkIndirectlyCalledFunctions(Module &M); - void DFSTraverse(CallGraphNode *CGN, unsigned Depth); - }; -} // End of namespace - -#endif diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp deleted file mode 100644 index 76de47f..0000000 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ /dev/null @@ -1,84 +0,0 @@ -//===- PIC16RegisterInfo.cpp - PIC16 Register Information -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PIC16 implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pic16-reg-info" - -#include "PIC16.h" -#include "PIC16RegisterInfo.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -PIC16RegisterInfo::PIC16RegisterInfo(const TargetInstrInfo &tii, - const PIC16Subtarget &st) - : PIC16GenRegisterInfo(PIC16::ADJCALLSTACKDOWN, PIC16::ADJCALLSTACKUP), - TII(tii), - ST(st) {} - -#include "PIC16GenRegisterInfo.inc" - -/// PIC16 Callee Saved Registers -const unsigned* PIC16RegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const { - static const unsigned CalleeSavedRegs[] = { 0 }; - return CalleeSavedRegs; -} - -BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - return Reserved; -} - -bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const { - return false; -} - -void PIC16RegisterInfo:: -eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const -{ /* NOT YET IMPLEMENTED */ } - -void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const -{ /* NOT YET IMPLEMENTED */ } - -void PIC16RegisterInfo:: -emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const -{ /* NOT YET IMPLEMENTED */ } - -int PIC16RegisterInfo:: -getDwarfRegNum(unsigned RegNum, bool isEH) const { - llvm_unreachable("Not keeping track of debug information yet!!"); - return -1; -} - -unsigned PIC16RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - llvm_unreachable("PIC16 Does not have any frame register"); - return 0; -} - -unsigned PIC16RegisterInfo::getRARegister() const { - llvm_unreachable("PIC16 Does not have any return address register"); - return 0; -} - -// This function eliminates ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void PIC16RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, - // ADJCALLSTACKUP instructions. - MBB.erase(I); -} - diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h deleted file mode 100644 index 20052b0..0000000 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ /dev/null @@ -1,64 +0,0 @@ -//===- PIC16RegisterInfo.h - PIC16 Register Information Impl ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PIC16 implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16REGISTERINFO_H -#define PIC16REGISTERINFO_H - -#include "PIC16GenRegisterInfo.h.inc" -#include "llvm/Target/TargetRegisterInfo.h" - -namespace llvm { - -// Forward Declarations. - class PIC16Subtarget; - class TargetInstrInfo; - -class PIC16RegisterInfo : public PIC16GenRegisterInfo { - private: - const TargetInstrInfo &TII; - const PIC16Subtarget &ST; - - public: - PIC16RegisterInfo(const TargetInstrInfo &tii, - const PIC16Subtarget &st); - - - //------------------------------------------------------ - // Pure virtual functions from TargetRegisterInfo - //------------------------------------------------------ - - // PIC16 callee saved registers - virtual const unsigned* - getCalleeSavedRegs(const MachineFunction *MF = 0) const; - - virtual BitVector getReservedRegs(const MachineFunction &MF) const; - virtual bool hasFP(const MachineFunction &MF) const; - - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS=NULL) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; - virtual unsigned getFrameRegister(const MachineFunction &MF) const; - virtual unsigned getRARegister() const; - -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/PIC16/PIC16RegisterInfo.td b/lib/Target/PIC16/PIC16RegisterInfo.td deleted file mode 100644 index 2959d91..0000000 --- a/lib/Target/PIC16/PIC16RegisterInfo.td +++ /dev/null @@ -1,33 +0,0 @@ -//===- PIC16RegisterInfo.td - PIC16 Register defs ------------*- tblgen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PIC16 register file -//===----------------------------------------------------------------------===// - -class PIC16Reg<string n> : Register<n> { - let Namespace = "PIC16"; -} - -// PIC16 Registers. -def W : PIC16Reg<"W">; -def FSR0 : PIC16Reg<"FSR0">; -def FSR1 : PIC16Reg<"FSR1">; -def BS : PIC16Reg<"BS">; -def PCLATH : PIC16Reg<"PCLATH">; - -def STATUS : PIC16Reg<"STATUS">; - -// PIC16 Register classes. -def GPR : RegisterClass<"PIC16", [i8], 8, [W]>; -def FSR16 : RegisterClass<"PIC16", [i16], 8, [FSR0, FSR1]>; -def BSR : RegisterClass<"PIC16", [i8], 8, [BS]>; -def PCLATHR : RegisterClass<"PIC16", [i8], 8, [PCLATH]>; -def STATUSR : RegisterClass<"PIC16", [i8], 8, [STATUS]>; - diff --git a/lib/Target/PIC16/PIC16Section.cpp b/lib/Target/PIC16/PIC16Section.cpp deleted file mode 100644 index 2505b11..0000000 --- a/lib/Target/PIC16/PIC16Section.cpp +++ /dev/null @@ -1,104 +0,0 @@ -//===-- PIC16Section.cpp - PIC16 Section ----------- --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "PIC16.h" -#include "PIC16ABINames.h" -#include "PIC16Section.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - - -// This is the only way to create a PIC16Section. Sections created here -// do not need to be explicitly deleted as they are managed by auto_ptrs. -PIC16Section *PIC16Section::Create(StringRef Name, PIC16SectionType Ty, - StringRef Address, int Color, - MCContext &Ctx) { - - /// Determine the internal SectionKind info. - /// Users of PIC16Section class should not need to know the internal - /// SectionKind. They should work only with PIC16SectionType. - /// - /// PIC16 Terminology for section kinds is as below. - /// UDATA - BSS - /// IDATA - initialized data (equiv to Metadata) - /// ROMDATA - ReadOnly. - /// UDATA_OVR - Sections that can be overlaid. Section of such type is - /// used to contain function autos an frame. We can think of - /// it as equiv to llvm ThreadBSS) - /// UDATA_SHR - Shared RAM. Memory area that is mapped to all banks. - - SectionKind K; - switch (Ty) { - default: llvm_unreachable ("can not create unknown section type"); - case UDATA_OVR: { - K = SectionKind::getThreadBSS(); - break; - } - case UDATA_SHR: - case UDATA: { - K = SectionKind::getBSS(); - break; - } - case ROMDATA: - case IDATA: { - K = SectionKind::getMetadata(); - break; - } - case CODE: { - K = SectionKind::getText(); - break; - } - - } - - // Copy strings into context allocated memory so they get free'd when the - // context is destroyed. - char *NameCopy = static_cast<char*>(Ctx.Allocate(Name.size(), 1)); - memcpy(NameCopy, Name.data(), Name.size()); - char *AddressCopy = static_cast<char*>(Ctx.Allocate(Address.size(), 1)); - memcpy(AddressCopy, Address.data(), Address.size()); - - // Create the Section. - PIC16Section *S = - new (Ctx) PIC16Section(StringRef(NameCopy, Name.size()), K, - StringRef(AddressCopy, Address.size()), Color); - S->T = Ty; - return S; -} - -// A generic way to print all types of sections. -void PIC16Section::PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS) const { - - // If the section is overlaid(i.e. it has a color), print overlay name for - // it. Otherwise print its normal name. - if (Color != -1) - OS << PAN::getOverlayName(getName(), Color) << '\t'; - else - OS << getName() << '\t'; - - // Print type. - switch (getType()) { - default : llvm_unreachable ("unknown section type"); - case UDATA: OS << "UDATA"; break; - case IDATA: OS << "IDATA"; break; - case ROMDATA: OS << "ROMDATA"; break; - case UDATA_SHR: OS << "UDATA_SHR"; break; - case UDATA_OVR: OS << "UDATA_OVR"; break; - case CODE: OS << "CODE"; break; - } - - OS << '\t'; - - // Print Address. - OS << Address; - - OS << '\n'; -} diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h deleted file mode 100644 index 5b33b51..0000000 --- a/lib/Target/PIC16/PIC16Section.h +++ /dev/null @@ -1,99 +0,0 @@ -//===- PIC16Section.h - PIC16-specific section representation -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the PIC16Section class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_PIC16SECTION_H -#define LLVM_PIC16SECTION_H - -#include "llvm/MC/MCSection.h" -#include "llvm/GlobalVariable.h" -#include <vector> - -namespace llvm { - /// PIC16Section - Represents a physical section in PIC16 COFF. - /// Contains data objects. - /// - class PIC16Section : public MCSection { - /// PIC16 Sections does not really use the SectionKind class to - /// to distinguish between various types of sections. PIC16 maintain - /// its own Section Type info. See the PIC16SectionType enum in PIC16.h - /// for various section types. - PIC16SectionType T; - - /// Name of the section to uniquely identify it. - StringRef Name; - - /// User can specify an address at which a section should be placed. - /// Negative value here means user hasn't specified any. - StringRef Address; - - /// Overlay information - Sections with same color can be overlaid on - /// one another. - int Color; - - /// Total size of all data objects contained here. - unsigned Size; - - PIC16Section(StringRef name, SectionKind K, StringRef addr, int color) - : MCSection(SV_PIC16, K), Name(name), Address(addr), - Color(color), Size(0) { - } - - public: - /// Return the name of the section. - StringRef getName() const { return Name; } - - /// Return the Address of the section. - StringRef getAddress() const { return Address; } - - /// Return the Color of the section. - int getColor() const { return Color; } - void setColor(int color) { Color = color; } - - /// Return the size of the section. - unsigned getSize() const { return Size; } - void setSize(unsigned size) { Size = size; } - - /// Conatined data objects. - // FIXME: This vector is leaked because sections are allocated with a - // BumpPtrAllocator. - std::vector<const GlobalVariable *>Items; - - /// Check section type. - bool isUDATA_Type() const { return T == UDATA; } - bool isIDATA_Type() const { return T == IDATA; } - bool isROMDATA_Type() const { return T == ROMDATA; } - bool isUDATA_OVR_Type() const { return T == UDATA_OVR; } - bool isUDATA_SHR_Type() const { return T == UDATA_SHR; } - bool isCODE_Type() const { return T == CODE; } - - PIC16SectionType getType() const { return T; } - - /// This would be the only way to create a section. - static PIC16Section *Create(StringRef Name, PIC16SectionType Ty, - StringRef Address, int Color, - MCContext &Ctx); - - /// Override this as PIC16 has its own way of printing switching - /// to a section. - virtual void PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS) const; - - static bool classof(const MCSection *S) { - return S->getVariant() == SV_PIC16; - } - static bool classof(const PIC16Section *) { return true; } - }; - -} // end namespace llvm - -#endif diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp deleted file mode 100644 index 995955a..0000000 --- a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- PIC16SelectionDAGInfo.cpp - PIC16 SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PIC16SelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pic16-selectiondag-info" -#include "PIC16TargetMachine.h" -using namespace llvm; - -PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} - -PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() { -} diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h deleted file mode 100644 index c67fd8b..0000000 --- a/lib/Target/PIC16/PIC16SelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- PIC16SelectionDAGInfo.h - PIC16 SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PIC16 subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16SELECTIONDAGINFO_H -#define PIC16SELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class PIC16TargetMachine; - -class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM); - ~PIC16SelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/PIC16/PIC16Subtarget.cpp b/lib/Target/PIC16/PIC16Subtarget.cpp deleted file mode 100644 index 33fc3fb..0000000 --- a/lib/Target/PIC16/PIC16Subtarget.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===- PIC16Subtarget.cpp - PIC16 Subtarget Information -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PIC16 specific subclass of TargetSubtarget. -// -//===----------------------------------------------------------------------===// - -#include "PIC16Subtarget.h" -#include "PIC16GenSubtarget.inc" - -using namespace llvm; - -PIC16Subtarget::PIC16Subtarget(const std::string &TT, const std::string &FS, - bool Cooper) - :IsCooper(Cooper) -{ - std::string CPU = "generic"; - - // Parse features string. - ParseSubtargetFeatures(FS, CPU); -} diff --git a/lib/Target/PIC16/PIC16Subtarget.h b/lib/Target/PIC16/PIC16Subtarget.h deleted file mode 100644 index 81e3783..0000000 --- a/lib/Target/PIC16/PIC16Subtarget.h +++ /dev/null @@ -1,44 +0,0 @@ -//=====-- PIC16Subtarget.h - Define Subtarget for the PIC16 ---*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the PIC16 specific subclass of TargetSubtarget. -// -//===----------------------------------------------------------------------===// - -#ifndef PIC16SUBTARGET_H -#define PIC16SUBTARGET_H - -#include "llvm/Target/TargetSubtarget.h" - -#include <string> - -namespace llvm { - -class PIC16Subtarget : public TargetSubtarget { - - // IsCooper - Target ISA is Cooper. - bool IsCooper; - -public: - /// This constructor initializes the data members to match that - /// of the specified triple. - /// - PIC16Subtarget(const std::string &TT, const std::string &FS, bool Cooper); - - /// isCooper - Returns true if the target ISA is Cooper. - bool isCooper() const { return IsCooper; } - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); -}; -} // End llvm namespace - -#endif // PIC16SUBTARGET_H diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp deleted file mode 100644 index 82b69be..0000000 --- a/lib/Target/PIC16/PIC16TargetMachine.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===-- PIC16TargetMachine.cpp - Define TargetMachine for PIC16 -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Top-level implementation for the PIC16 target. -// -//===----------------------------------------------------------------------===// - -#include "PIC16.h" -#include "PIC16MCAsmInfo.h" -#include "PIC16TargetMachine.h" -#include "llvm/PassManager.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetRegistry.h" - -using namespace llvm; - -extern "C" void LLVMInitializePIC16Target() { - // Register the target. Curretnly the codegen works for - // enhanced pic16 mid-range. - RegisterTargetMachine<PIC16TargetMachine> X(ThePIC16Target); - RegisterAsmInfo<PIC16MCAsmInfo> A(ThePIC16Target); -} - - -// PIC16TargetMachine - Enhanced PIC16 mid-range Machine. May also represent -// a Traditional Machine if 'Trad' is true. -PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool Trad) -: LLVMTargetMachine(T, TT), - Subtarget(TT, FS, Trad), - DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { } - - -bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - // Install an instruction selector. - PM.add(createPIC16ISelDag(*this)); - return false; -} - -bool PIC16TargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createPIC16MemSelOptimizerPass()); - return true; // -print-machineinstr should print after this. -} - - diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h deleted file mode 100644 index dae5d31..0000000 --- a/lib/Target/PIC16/PIC16TargetMachine.h +++ /dev/null @@ -1,70 +0,0 @@ -//===-- PIC16TargetMachine.h - Define TargetMachine for PIC16 ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the PIC16 specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - - -#ifndef PIC16_TARGETMACHINE_H -#define PIC16_TARGETMACHINE_H - -#include "PIC16InstrInfo.h" -#include "PIC16ISelLowering.h" -#include "PIC16SelectionDAGInfo.h" -#include "PIC16RegisterInfo.h" -#include "PIC16Subtarget.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -/// PIC16TargetMachine -/// -class PIC16TargetMachine : public LLVMTargetMachine { - PIC16Subtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - PIC16InstrInfo InstrInfo; - PIC16TargetLowering TLInfo; - PIC16SelectionDAGInfo TSInfo; - - // PIC16 does not have any call stack frame, therefore not having - // any PIC16 specific FrameInfo class. - TargetFrameInfo FrameInfo; - -public: - PIC16TargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool Cooper = false); - - virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } - virtual const PIC16InstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout;} - virtual const PIC16Subtarget *getSubtargetImpl() const { return &Subtarget; } - - virtual const PIC16RegisterInfo *getRegisterInfo() const { - return &(InstrInfo.getRegisterInfo()); - } - - virtual const PIC16TargetLowering *getTargetLowering() const { - return &TLInfo; - } - - virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); -}; // PIC16TargetMachine. - -} // end namespace llvm - -#endif diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp deleted file mode 100644 index ff0f971..0000000 --- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp +++ /dev/null @@ -1,384 +0,0 @@ -//===-- PIC16TargetObjectFile.cpp - PIC16 object files --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "PIC16TargetObjectFile.h" -#include "PIC16TargetMachine.h" -#include "PIC16Section.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/MC/MCSection.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - - -PIC16TargetObjectFile::PIC16TargetObjectFile() { -} - -PIC16TargetObjectFile::~PIC16TargetObjectFile() { -} - -/// Find a pic16 section. Return null if not found. Do not create one. -PIC16Section *PIC16TargetObjectFile:: -findPIC16Section(const std::string &Name) const { - /// Return if we have an already existing one. - PIC16Section *Entry = SectionsByName[Name]; - if (Entry) - return Entry; - - return NULL; -} - - -/// Find a pic16 section. If not found, create one. -PIC16Section *PIC16TargetObjectFile:: -getPIC16Section(const std::string &Name, PIC16SectionType Ty, - const std::string &Address, int Color) const { - - /// Return if we have an already existing one. - PIC16Section *&Entry = SectionsByName[Name]; - if (Entry) - return Entry; - - - Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext()); - return Entry; -} - -/// Find a standard pic16 data section. If not found, create one and keep -/// track of it by adding it to appropriate std section list. -PIC16Section *PIC16TargetObjectFile:: -getPIC16DataSection(const std::string &Name, PIC16SectionType Ty, - const std::string &Address, int Color) const { - - /// Return if we have an already existing one. - PIC16Section *&Entry = SectionsByName[Name]; - if (Entry) - return Entry; - - - /// Else create a new one and add it to appropriate section list. - Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext()); - - switch (Ty) { - default: llvm_unreachable ("unknow standard section type."); - case UDATA: UDATASections_.push_back(Entry); break; - case IDATA: IDATASections_.push_back(Entry); break; - case ROMDATA: ROMDATASection_ = Entry; break; - case UDATA_SHR: SHAREDUDATASection_ = Entry; break; - } - - return Entry; -} - - -/// Find a standard pic16 autos section. If not found, create one and keep -/// track of it by adding it to appropriate std section list. -PIC16Section *PIC16TargetObjectFile:: -getPIC16AutoSection(const std::string &Name, PIC16SectionType Ty, - const std::string &Address, int Color) const { - - /// Return if we have an already existing one. - PIC16Section *&Entry = SectionsByName[Name]; - if (Entry) - return Entry; - - - /// Else create a new one and add it to appropriate section list. - Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext()); - - assert (Ty == UDATA_OVR && "incorrect section type for autos"); - AUTOSections_.push_back(Entry); - - return Entry; -} - -/// Find a pic16 user section. If not found, create one and keep -/// track of it by adding it to appropriate std section list. -PIC16Section *PIC16TargetObjectFile:: -getPIC16UserSection(const std::string &Name, PIC16SectionType Ty, - const std::string &Address, int Color) const { - - /// Return if we have an already existing one. - PIC16Section *&Entry = SectionsByName[Name]; - if (Entry) - return Entry; - - - /// Else create a new one and add it to appropriate section list. - Entry = PIC16Section::Create(Name, Ty, Address, Color, getContext()); - - USERSections_.push_back(Entry); - - return Entry; -} - -/// Do some standard initialization. -void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){ - TargetLoweringObjectFile::Initialize(Ctx, tm); - TM = &tm; - - ROMDATASection_ = NULL; - SHAREDUDATASection_ = NULL; -} - -/// allocateUDATA - Allocate a un-initialized global to an existing or new UDATA -/// section and return that section. -const MCSection * -PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const { - assert(GV->hasInitializer() && "This global doesn't need space"); - const Constant *C = GV->getInitializer(); - assert(C->isNullValue() && "Unitialized globals has non-zero initializer"); - - // Find how much space this global needs. - const TargetData *TD = TM->getTargetData(); - const Type *Ty = C->getType(); - unsigned ValSize = TD->getTypeAllocSize(Ty); - - // Go through all UDATA Sections and assign this variable - // to the first available section having enough space. - PIC16Section *Found = NULL; - for (unsigned i = 0; i < UDATASections_.size(); i++) { - if (DataBankSize - UDATASections_[i]->getSize() >= ValSize) { - Found = UDATASections_[i]; - break; - } - } - - // No UDATA section spacious enough was found. Crate a new one. - if (!Found) { - std::string name = PAN::getUdataSectionName(UDATASections_.size()); - Found = getPIC16DataSection(name.c_str(), UDATA); - } - - // Insert the GV into this UDATA section. - Found->Items.push_back(GV); - Found->setSize(Found->getSize() + ValSize); - return Found; -} - -/// allocateIDATA - allocate an initialized global into an existing -/// or new section and return that section. -const MCSection * -PIC16TargetObjectFile::allocateIDATA(const GlobalVariable *GV) const{ - assert(GV->hasInitializer() && "This global doesn't need space"); - const Constant *C = GV->getInitializer(); - assert(!C->isNullValue() && "initialized globals has zero initializer"); - assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE && - "can allocate initialized RAM data only"); - - // Find how much space this global needs. - const TargetData *TD = TM->getTargetData(); - const Type *Ty = C->getType(); - unsigned ValSize = TD->getTypeAllocSize(Ty); - - // Go through all IDATA Sections and assign this variable - // to the first available section having enough space. - PIC16Section *Found = NULL; - for (unsigned i = 0; i < IDATASections_.size(); i++) { - if (DataBankSize - IDATASections_[i]->getSize() >= ValSize) { - Found = IDATASections_[i]; - break; - } - } - - // No IDATA section spacious enough was found. Crate a new one. - if (!Found) { - std::string name = PAN::getIdataSectionName(IDATASections_.size()); - Found = getPIC16DataSection(name.c_str(), IDATA); - } - - // Insert the GV into this IDATA. - Found->Items.push_back(GV); - Found->setSize(Found->getSize() + ValSize); - return Found; -} - -// Allocate a program memory variable into ROMDATA section. -const MCSection * -PIC16TargetObjectFile::allocateROMDATA(const GlobalVariable *GV) const { - - std::string name = PAN::getRomdataSectionName(); - PIC16Section *S = getPIC16DataSection(name.c_str(), ROMDATA); - - S->Items.push_back(GV); - return S; -} - -// Get the section for an automatic variable of a function. -// For PIC16 they are globals only with mangled names. -const MCSection * -PIC16TargetObjectFile::allocateAUTO(const GlobalVariable *GV) const { - - const std::string name = PAN::getSectionNameForSym(GV->getName()); - PIC16Section *S = getPIC16AutoSection(name.c_str()); - - S->Items.push_back(GV); - return S; -} - - -// Override default implementation to put the true globals into -// multiple data sections if required. -const MCSection * -PIC16TargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV1, - SectionKind Kind, - Mangler *Mang, - const TargetMachine &TM) const { - // We select the section based on the initializer here, so it really - // has to be a GlobalVariable. - const GlobalVariable *GV = dyn_cast<GlobalVariable>(GV1); - if (!GV) - return TargetLoweringObjectFile::SelectSectionForGlobal(GV1, Kind, Mang,TM); - - assert(GV->hasInitializer() && "A def without initializer?"); - - // First, if this is an automatic variable for a function, get the section - // name for it and return. - std::string name = GV->getName(); - if (PAN::isLocalName(name)) - return allocateAUTO(GV); - - // See if this is an uninitialized global. - const Constant *C = GV->getInitializer(); - if (C->isNullValue()) - return allocateUDATA(GV); - - // If this is initialized data in RAM. Put it in the correct IDATA section. - if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) - return allocateIDATA(GV); - - // This is initialized data in rom, put it in the readonly section. - if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) - return allocateROMDATA(GV); - - // Else let the default implementation take care of it. - return TargetLoweringObjectFile::SelectSectionForGlobal(GV, Kind, Mang,TM); -} - - - - -/// getExplicitSectionGlobal - Allow the target to completely override -/// section assignment of a global. -const MCSection *PIC16TargetObjectFile:: -getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const { - assert(GV->hasSection()); - - if (const GlobalVariable *GVar = cast<GlobalVariable>(GV)) { - std::string SectName = GVar->getSection(); - // If address for a variable is specified, get the address and create - // section. - // FIXME: move this attribute checking in PAN. - std::string AddrStr = "Address="; - if (SectName.compare(0, AddrStr.length(), AddrStr) == 0) { - std::string SectAddr = SectName.substr(AddrStr.length()); - if (SectAddr.compare("NEAR") == 0) - return allocateSHARED(GVar, Mang); - else - return allocateAtGivenAddress(GVar, SectAddr); - } - - // Create the section specified with section attribute. - return allocateInGivenSection(GVar); - } - - return getPIC16DataSection(GV->getSection().c_str(), UDATA); -} - -const MCSection * -PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV, - Mangler *Mang) const { - // Make sure that this is an uninitialized global. - assert(GV->hasInitializer() && "This global doesn't need space"); - if (!GV->getInitializer()->isNullValue()) { - // FIXME: Generate a warning in this case that near qualifier will be - // ignored. - return SelectSectionForGlobal(GV, SectionKind::getDataRel(), Mang, *TM); - } - std::string Name = PAN::getSharedUDataSectionName(); - - PIC16Section *SharedUDataSect = getPIC16DataSection(Name.c_str(), UDATA_SHR); - // Insert the GV into shared section. - SharedUDataSect->Items.push_back(GV); - return SharedUDataSect; -} - - -// Interface used by AsmPrinter to get a code section for a function. -const PIC16Section * -PIC16TargetObjectFile::SectionForCode(const std::string &FnName, - bool isISR) const { - const std::string &sec_name = PAN::getCodeSectionName(FnName); - // If it is ISR, its code section starts at a specific address. - if (isISR) - return getPIC16Section(sec_name, CODE, PAN::getISRAddr()); - return getPIC16Section(sec_name, CODE); -} - -// Interface used by AsmPrinter to get a frame section for a function. -const PIC16Section * -PIC16TargetObjectFile::SectionForFrame(const std::string &FnName) const { - const std::string &sec_name = PAN::getFrameSectionName(FnName); - return getPIC16Section(sec_name, UDATA_OVR); -} - -// Allocate a global var in existing or new section of given name. -const MCSection * -PIC16TargetObjectFile::allocateInGivenSection(const GlobalVariable *GV) const { - // Determine the type of section that we need to create. - PIC16SectionType SecTy; - - // See if this is an uninitialized global. - const Constant *C = GV->getInitializer(); - if (C->isNullValue()) - SecTy = UDATA; - // If this is initialized data in RAM. Put it in the correct IDATA section. - else if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) - SecTy = IDATA; - // This is initialized data in rom, put it in the readonly section. - else if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) - SecTy = ROMDATA; - else - llvm_unreachable ("Could not determine section type for global"); - - PIC16Section *S = getPIC16UserSection(GV->getSection().c_str(), SecTy); - S->Items.push_back(GV); - return S; -} - -// Allocate a global var in a new absolute sections at given address. -const MCSection * -PIC16TargetObjectFile::allocateAtGivenAddress(const GlobalVariable *GV, - const std::string &Addr) const { - // Determine the type of section that we need to create. - PIC16SectionType SecTy; - - // See if this is an uninitialized global. - const Constant *C = GV->getInitializer(); - if (C->isNullValue()) - SecTy = UDATA; - // If this is initialized data in RAM. Put it in the correct IDATA section. - else if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) - SecTy = IDATA; - // This is initialized data in rom, put it in the readonly section. - else if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) - SecTy = ROMDATA; - else - llvm_unreachable ("Could not determine section type for global"); - - std::string Prefix = GV->getNameStr() + "." + Addr + "."; - std::string SName = PAN::getUserSectionName(Prefix); - PIC16Section *S = getPIC16UserSection(SName.c_str(), SecTy, Addr.c_str()); - S->Items.push_back(GV); - return S; -} - - diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h deleted file mode 100644 index b1eb9f9..0000000 --- a/lib/Target/PIC16/PIC16TargetObjectFile.h +++ /dev/null @@ -1,168 +0,0 @@ -//===-- PIC16TargetObjectFile.h - PIC16 Object Info -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_PIC16_TARGETOBJECTFILE_H -#define LLVM_TARGET_PIC16_TARGETOBJECTFILE_H - -#include "PIC16.h" -#include "PIC16ABINames.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/ADT/StringMap.h" -#include <vector> -#include <string> - -namespace llvm { - class GlobalVariable; - class Module; - class PIC16TargetMachine; - class PIC16Section; - - enum { DataBankSize = 80 }; - - /// PIC16 Splits the global data into mulitple udata and idata sections. - /// Each udata and idata section needs to contain a list of globals that - /// they contain, in order to avoid scanning over all the global values - /// again and printing only those that match the current section. - /// Keeping values inside the sections make printing a section much easier. - /// - /// FIXME: MOVE ALL THIS STUFF TO PIC16Section. - /// - - /// PIC16TargetObjectFile - PIC16 Object file. Contains data and code - /// sections. - // PIC16 Object File has two types of sections. - // 1. Standard Sections - // 1.1 un-initialized global data - // 1.2 initialized global data - // 1.3 program memory data - // 1.4 local variables of functions. - // 2. User defined sections - // 2.1 Objects placed in a specific section. (By _Section() macro) - // 2.2 Objects placed at a specific address. (By _Address() macro) - class PIC16TargetObjectFile : public TargetLoweringObjectFile { - /// SectionsByName - Bindings of names to allocated sections. - mutable StringMap<PIC16Section*> SectionsByName; - - const TargetMachine *TM; - - /// Lists of sections. - /// Standard Data Sections. - mutable std::vector<PIC16Section *> UDATASections_; - mutable std::vector<PIC16Section *> IDATASections_; - mutable PIC16Section * ROMDATASection_; - mutable PIC16Section * SHAREDUDATASection_; - - /// Standard Auto Sections. - mutable std::vector<PIC16Section *> AUTOSections_; - - /// User specified sections. - mutable std::vector<PIC16Section *> USERSections_; - - - /// Find or Create a PIC16 Section, without adding it to any - /// section list. - PIC16Section *getPIC16Section(const std::string &Name, - PIC16SectionType Ty, - const std::string &Address = "", - int Color = -1) const; - - /// Convenience functions. These wrappers also take care of adding - /// the newly created section to the appropriate sections list. - - /// Find or Create PIC16 Standard Data Section. - PIC16Section *getPIC16DataSection(const std::string &Name, - PIC16SectionType Ty, - const std::string &Address = "", - int Color = -1) const; - - /// Find or Create PIC16 Standard Auto Section. - PIC16Section *getPIC16AutoSection(const std::string &Name, - PIC16SectionType Ty = UDATA_OVR, - const std::string &Address = "", - int Color = -1) const; - - /// Find or Create PIC16 Standard Auto Section. - PIC16Section *getPIC16UserSection(const std::string &Name, - PIC16SectionType Ty, - const std::string &Address = "", - int Color = -1) const; - - /// Allocate Un-initialized data to a standard UDATA section. - const MCSection *allocateUDATA(const GlobalVariable *GV) const; - - /// Allocate Initialized data to a standard IDATA section. - const MCSection *allocateIDATA(const GlobalVariable *GV) const; - - /// Allocate ROM data to the standard ROMDATA section. - const MCSection *allocateROMDATA(const GlobalVariable *GV) const; - - /// Allocate an AUTO variable to an AUTO section. - const MCSection *allocateAUTO(const GlobalVariable *GV) const; - - /// Allocate DATA in user specified section. - const MCSection *allocateInGivenSection(const GlobalVariable *GV) const; - - /// Allocate DATA at user specified address. - const MCSection *allocateAtGivenAddress(const GlobalVariable *GV, - const std::string &Addr) const; - - /// Allocate a shared variable to SHARED section. - const MCSection *allocateSHARED(const GlobalVariable *GV, - Mangler *Mang) const; - - public: - PIC16TargetObjectFile(); - ~PIC16TargetObjectFile(); - void Initialize(MCContext &Ctx, const TargetMachine &TM); - - /// Return the section with the given Name. Null if not found. - PIC16Section *findPIC16Section(const std::string &Name) const; - - /// Override section allocations for user specified sections. - virtual const MCSection * - getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, const TargetMachine &TM) const; - - /// Select sections for Data and Auto variables(globals). - virtual const MCSection *SelectSectionForGlobal(const GlobalValue *GV, - SectionKind Kind, - Mangler *Mang, - const TargetMachine&) const; - - - /// Return a code section for a function. - const PIC16Section *SectionForCode (const std::string &FnName, - bool isISR) const; - - /// Return a frame section for a function. - const PIC16Section *SectionForFrame (const std::string &FnName) const; - - /// Accessors for various section lists. - const std::vector<PIC16Section *> &UDATASections() const { - return UDATASections_; - } - const std::vector<PIC16Section *> &IDATASections() const { - return IDATASections_; - } - const PIC16Section *ROMDATASection() const { - return ROMDATASection_; - } - const PIC16Section *SHAREDUDATASection() const { - return SHAREDUDATASection_; - } - const std::vector<PIC16Section *> &AUTOSections() const { - return AUTOSections_; - } - const std::vector<PIC16Section *> &USERSections() const { - return USERSections_; - } - }; -} // end namespace llvm - -#endif diff --git a/lib/Target/PIC16/TargetInfo/CMakeLists.txt b/lib/Target/PIC16/TargetInfo/CMakeLists.txt deleted file mode 100644 index bfc6ff4..0000000 --- a/lib/Target/PIC16/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPIC16Info - PIC16TargetInfo.cpp - ) - -add_dependencies(LLVMPIC16Info PIC16Table_gen) diff --git a/lib/Target/PIC16/TargetInfo/Makefile b/lib/Target/PIC16/TargetInfo/Makefile deleted file mode 100644 index 76609f6..0000000 --- a/lib/Target/PIC16/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PIC16/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPIC16Info - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp deleted file mode 100644 index f1bdb12..0000000 --- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//===-- PIC16TargetInfo.cpp - PIC16 Target Implementation -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "PIC16.h" -#include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" -using namespace llvm; - -Target llvm::ThePIC16Target, llvm::TheCooperTarget; - -extern "C" void LLVMInitializePIC16TargetInfo() { - RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16", - "PIC16 14-bit [experimental]"); - - RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]"); -} diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt new file mode 100644 index 0000000..331266d --- /dev/null +++ b/lib/Target/PTX/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LLVM_TARGET_DEFINITIONS PTX.td) + +tablegen(PTXGenAsmWriter.inc -gen-asm-writer) +tablegen(PTXGenDAGISel.inc -gen-dag-isel) +tablegen(PTXGenInstrInfo.inc -gen-instr-desc) +tablegen(PTXGenInstrNames.inc -gen-instr-enums) +tablegen(PTXGenRegisterInfo.inc -gen-register-desc) +tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(PTXGenRegisterNames.inc -gen-register-enums) +tablegen(PTXGenSubtarget.inc -gen-subtarget) + +add_llvm_target(PTXCodeGen + PTXAsmPrinter.cpp + PTXISelDAGToDAG.cpp + PTXISelLowering.cpp + PTXInstrInfo.cpp + PTXFrameLowering.cpp + PTXMCAsmInfo.cpp + PTXMCAsmStreamer.cpp + PTXMFInfoExtract.cpp + PTXRegisterInfo.cpp + PTXSubtarget.cpp + PTXTargetMachine.cpp + ) + +add_subdirectory(TargetInfo) diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile new file mode 100644 index 0000000..2c40d69 --- /dev/null +++ b/lib/Target/PTX/Makefile @@ -0,0 +1,26 @@ +##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMPTXCodeGen +TARGET = PTX + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = PTXGenAsmWriter.inc \ + PTXGenDAGISel.inc \ + PTXGenInstrInfo.inc \ + PTXGenInstrNames.inc \ + PTXGenRegisterInfo.inc \ + PTXGenRegisterInfo.h.inc \ + PTXGenRegisterNames.inc \ + PTXGenSubtarget.inc + +DIRS = TargetInfo + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h new file mode 100644 index 0000000..19385ba --- /dev/null +++ b/lib/Target/PTX/PTX.h @@ -0,0 +1,49 @@ +//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// PTX back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_H +#define PTX_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class PTXTargetMachine; + class FunctionPass; + + namespace PTX { + enum StateSpace { + GLOBAL = 0, // default to global state space + CONSTANT = 1, + LOCAL = 2, + PARAMETER = 3, + SHARED = 4 + }; + } // namespace PTX + + FunctionPass *createPTXISelDag(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + extern Target ThePTXTarget; +} // namespace llvm; + +// Defines symbolic names for PTX registers. +#include "PTXGenRegisterNames.inc" + +// Defines symbolic names for the PTX instructions. +#include "PTXGenInstrNames.inc" + +#endif // PTX_H diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td new file mode 100644 index 0000000..8b1a1b1 --- /dev/null +++ b/lib/Target/PTX/PTX.td @@ -0,0 +1,54 @@ +//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the PTX target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Subtarget Features. +//===----------------------------------------------------------------------===// + +def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true", + "Enable sm_20 target architecture">; + +//===----------------------------------------------------------------------===// +// PTX supported processors. +//===----------------------------------------------------------------------===// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, NoItineraries, Features>; + +def : Proc<"generic", []>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "PTXRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "PTXInstrInfo.td" + +def PTXInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def PTX : Target { + let InstructionSet = PTXInstrInfo; +} diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp new file mode 100644 index 0000000..a605997 --- /dev/null +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -0,0 +1,347 @@ +//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PTX assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-asm-printer" + +#include "PTX.h" +#include "PTXMachineFunctionInfo.h" +#include "PTXTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<std::string> +OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4")); + +static cl::opt<std::string> +OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"), + cl::init("sm_10")); + +namespace { +class PTXAsmPrinter : public AsmPrinter { +public: + explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + const char *getPassName() const { return "PTX Assembly Printer"; } + + bool doFinalization(Module &M); + + virtual void EmitStartOfAsmFile(Module &M); + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); } + + virtual void EmitInstruction(const MachineInstr *MI); + + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); + void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); + + // autogen'd. + void printInstruction(const MachineInstr *MI, raw_ostream &OS); + static const char *getRegisterName(unsigned RegNo); + +private: + void EmitVariableDeclaration(const GlobalVariable *gv); + void EmitFunctionDeclaration(); +}; // class PTXAsmPrinter +} // namespace + +static const char PARAM_PREFIX[] = "__param_"; + +static const char *getRegisterTypeName(unsigned RegNo) { +#define TEST_REGCLS(cls, clsstr) \ + if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; + TEST_REGCLS(RRegs32, s32); + TEST_REGCLS(Preds, pred); +#undef TEST_REGCLS + + llvm_unreachable("Not in any register class!"); + return NULL; +} + +static const char *getInstructionTypeName(const MachineInstr *MI) { + for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.getType() == MachineOperand::MO_Register) + return getRegisterTypeName(MO.getReg()); + } + + llvm_unreachable("No reg operand found in instruction!"); + return NULL; +} + +static const char *getStateSpaceName(unsigned addressSpace) { + switch (addressSpace) { + default: llvm_unreachable("Unknown state space"); + case PTX::GLOBAL: return "global"; + case PTX::CONSTANT: return "const"; + case PTX::LOCAL: return "local"; + case PTX::PARAMETER: return "param"; + case PTX::SHARED: return "shared"; + } + return NULL; +} + +bool PTXAsmPrinter::doFinalization(Module &M) { + // XXX Temproarily remove global variables so that doFinalization() will not + // emit them again (global variables are emitted at beginning). + + Module::GlobalListType &global_list = M.getGlobalList(); + int i, n = global_list.size(); + GlobalVariable **gv_array = new GlobalVariable* [n]; + + // first, back-up GlobalVariable in gv_array + i = 0; + for (Module::global_iterator I = global_list.begin(), E = global_list.end(); + I != E; ++I) + gv_array[i++] = &*I; + + // second, empty global_list + while (!global_list.empty()) + global_list.remove(global_list.begin()); + + // call doFinalization + bool ret = AsmPrinter::doFinalization(M); + + // now we restore global variables + for (i = 0; i < n; i ++) + global_list.insert(global_list.end(), gv_array[i]); + + delete[] gv_array; + return ret; +} + +void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) +{ + OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion)); + OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget)); + OutStreamer.AddBlankLine(); + + // declare global variables + for (Module::const_global_iterator i = M.global_begin(), e = M.global_end(); + i != e; ++i) + EmitVariableDeclaration(i); +} + +bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SetupMachineFunction(MF); + EmitFunctionDeclaration(); + EmitFunctionBody(); + return false; +} + +void PTXAsmPrinter::EmitFunctionBodyStart() { + OutStreamer.EmitRawText(Twine("{")); + + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + + // Print local variable definition + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) { + unsigned reg = *i; + + std::string def = "\t.reg ."; + def += getRegisterTypeName(reg); + def += ' '; + def += getRegisterName(reg); + def += ';'; + OutStreamer.EmitRawText(Twine(def)); + } +} + +void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { + std::string str; + str.reserve(64); + + // Write instruction to str + raw_string_ostream OS(str); + printInstruction(MI, OS); + OS << ';'; + OS.flush(); + + // Replace "%type" if found + size_t pos; + if ((pos = str.find("%type")) != std::string::npos) + str.replace(pos, /*strlen("%type")==*/5, getInstructionTypeName(MI)); + + StringRef strref = StringRef(str); + OutStreamer.EmitRawText(strref); +} + +void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + default: + llvm_unreachable("<unknown operand type>"); + break; + case MachineOperand::MO_GlobalAddress: + OS << *Mang->getSymbol(MO.getGlobal()); + break; + case MachineOperand::MO_Immediate: + OS << (int) MO.getImm(); + break; + case MachineOperand::MO_Register: + OS << getRegisterName(MO.getReg()); + break; + } +} + +void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + printOperand(MI, opNum, OS); + + if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) + return; // don't print "+0" + + OS << "+"; + printOperand(MI, opNum+1, OS); +} + +void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +} + +void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(gv)) + return; + + MCSymbol *gvsym = Mang->getSymbol(gv); + + assert(gvsym->isUndefined() && "Cannot define a symbol twice!"); + + std::string decl; + + // check if it is defined in some other translation unit + if (gv->isDeclaration()) + decl += ".extern "; + + // state space: e.g., .global + decl += "."; + decl += getStateSpaceName(gv->getType()->getAddressSpace()); + decl += " "; + + // alignment (optional) + unsigned alignment = gv->getAlignment(); + if (alignment != 0) { + decl += ".align "; + decl += utostr(Log2_32(gv->getAlignment())); + decl += " "; + } + + // TODO: add types + decl += ".s32 "; + + decl += gvsym->getName(); + + if (ArrayType::classof(gv->getType()) || PointerType::classof(gv->getType())) + decl += "[]"; + + decl += ";"; + + OutStreamer.EmitRawText(Twine(decl)); + + OutStreamer.AddBlankLine(); +} + +void PTXAsmPrinter::EmitFunctionDeclaration() { + // The function label could have already been emitted if two symbols end up + // conflicting due to asm renaming. Detect this and emit an error. + if (!CurrentFnSym->isUndefined()) { + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); + return; + } + + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const bool isKernel = MFI->isKernel(); + unsigned reg; + + std::string decl = isKernel ? ".entry" : ".func"; + + // Print return register + reg = MFI->retReg(); + if (!isKernel && reg != PTX::NoRegister) { + decl += " (.reg ."; // FIXME: could it return in .param space? + decl += getRegisterTypeName(reg); + decl += " "; + decl += getRegisterName(reg); + decl += ")"; + } + + // Print function name + decl += " "; + decl += CurrentFnSym->getName().str(); + + // Print parameter list + if (!MFI->argRegEmpty()) { + decl += " ("; + if (isKernel) { + for (int i = 0, e = MFI->getNumArg(); i != e; ++i) { + if (i != 0) + decl += ", "; + decl += ".param .s32 "; // TODO: add types + decl += PARAM_PREFIX; + decl += utostr(i + 1); + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) { + reg = *i; + assert(reg != PTX::NoRegister && "Not a valid register!"); + if (i != b) + decl += ", "; + decl += ".reg ."; + decl += getRegisterTypeName(reg); + decl += " "; + decl += getRegisterName(reg); + } + } + decl += ")"; + } + + OutStreamer.EmitRawText(Twine(decl)); +} + +#include "PTXGenAsmWriter.inc" + +// Force static initialization. +extern "C" void LLVMInitializePTXAsmPrinter() { + RegisterAsmPrinter<PTXAsmPrinter> X(ThePTXTarget); +} diff --git a/lib/Target/PTX/PTXFrameLowering.cpp b/lib/Target/PTX/PTXFrameLowering.cpp new file mode 100644 index 0000000..b621b9d --- /dev/null +++ b/lib/Target/PTX/PTXFrameLowering.cpp @@ -0,0 +1,24 @@ +//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PTXFrameLowering.h" +#include "llvm/CodeGen/MachineFunction.h" + +using namespace llvm; + +void PTXFrameLowering::emitPrologue(MachineFunction &MF) const { +} + +void PTXFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { +} diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h new file mode 100644 index 0000000..574ae7a --- /dev/null +++ b/lib/Target/PTX/PTXFrameLowering.h @@ -0,0 +1,43 @@ +//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_FRAMEINFO_H +#define PTX_FRAMEINFO_H + +#include "PTX.h" +#include "PTXSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class PTXSubtarget; + +class PTXFrameLowering : public TargetFrameLowering { +protected: + const PTXSubtarget &STI; + +public: + explicit PTXFrameLowering(const PTXSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const { return false; } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp new file mode 100644 index 0000000..efb0e8b --- /dev/null +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -0,0 +1,151 @@ +//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the PTX target. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/DerivedTypes.h" + +using namespace llvm; + +namespace { +// PTXDAGToDAGISel - PTX specific code to select PTX machine +// instructions for SelectionDAG operations. +class PTXDAGToDAGISel : public SelectionDAGISel { + public: + PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); + + virtual const char *getPassName() const { + return "PTX DAG->DAG Pattern Instruction Selection"; + } + + SDNode *Select(SDNode *Node); + + // Complex Pattern Selectors. + bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); + + // Include the pieces auto'gened from the target description +#include "PTXGenDAGISel.inc" + + private: + SDNode *SelectREAD_PARAM(SDNode *Node); + + bool isImm(const SDValue &operand); + bool SelectImm(const SDValue &operand, SDValue &imm); +}; // class PTXDAGToDAGISel +} // namespace + +// createPTXISelDag - This pass converts a legalized DAG into a +// PTX-specific DAG, ready for instruction scheduling +FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXDAGToDAGISel(TM, OptLevel); +} + +PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel) {} + +SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { + if (Node->getOpcode() == PTXISD::READ_PARAM) + return SelectREAD_PARAM(Node); + else + return SelectCode(Node); +} + +SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) { + SDValue index = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + + if (index.getOpcode() != ISD::TargetConstant) + llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant"); + + return PTXInstrInfo:: + GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index); +} + +// Match memory operand of the form [reg+reg] +bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { + if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || + isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1))) + return false; + + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +// Match memory operand of the form [reg], [imm+reg], and [reg+imm] +bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() != ISD::ADD) { + // let SelectADDRii handle the [imm] case + if (isImm(Addr)) + return false; + // it is [reg] + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + if (Addr.getNumOperands() < 2) + return false; + + // let SelectADDRii handle the [imm+imm] case + if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1))) + return false; + + // try [reg+imm] and [imm+reg] + for (int i = 0; i < 2; i ++) + if (SelectImm(Addr.getOperand(1-i), Offset)) { + Base = Addr.getOperand(i); + return true; + } + + // neither [reg+imm] nor [imm+reg] + return false; +} + +// Match memory operand of the form [imm+imm] and [imm] +bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + // is [imm+imm]? + if (Addr.getOpcode() == ISD::ADD) { + return SelectImm(Addr.getOperand(0), Base) && + SelectImm(Addr.getOperand(1), Offset); + } + + // is [imm]? + if (SelectImm(Addr, Base)) { + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + return false; +} + +bool PTXDAGToDAGISel::isImm(const SDValue &operand) { + return ConstantSDNode::classof(operand.getNode()); +} + +bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) { + SDNode *node = operand.getNode(); + if (!ConstantSDNode::classof(node)) + return false; + + ConstantSDNode *CN = cast<ConstantSDNode>(node); + imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32); + return true; +} diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp new file mode 100644 index 0000000..e6d4490 --- /dev/null +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -0,0 +1,210 @@ +//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXISelLowering.h" +#include "PTXMachineFunctionInfo.h" +#include "PTXRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +using namespace llvm; + +PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) + : TargetLowering(TM, new TargetLoweringObjectFileELF()) { + // Set up the register classes. + addRegisterClass(MVT::i1, PTX::PredsRegisterClass); + addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + + // Customize translation of memory addresses + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + + // Compute derived properties from the register classes + computeRegisterProperties(); +} + +SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: llvm_unreachable("Unimplemented operand"); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + } +} + +const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: + llvm_unreachable("Unknown opcode"); + case PTXISD::READ_PARAM: + return "PTXISD::READ_PARAM"; + case PTXISD::EXIT: + return "PTXISD::EXIT"; + case PTXISD::RET: + return "PTXISD::RET"; + } +} + +//===----------------------------------------------------------------------===// +// Custom Lower Operation +//===----------------------------------------------------------------------===// + +SDValue PTXTargetLowering:: +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + return DAG.getTargetGlobalAddress(GV, dl, PtrVT); +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +namespace { +struct argmap_entry { + MVT::SimpleValueType VT; + TargetRegisterClass *RC; + TargetRegisterClass::iterator loc; + + argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC) + : VT(_VT), RC(_RC), loc(_RC->begin()) {} + + void reset() { loc = RC->begin(); } + bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; } +} argmap[] = { + argmap_entry(MVT::i1, PTX::PredsRegisterClass), + argmap_entry(MVT::i32, PTX::RRegs32RegisterClass) +}; +} // end anonymous namespace + +SDValue PTXTargetLowering:: + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + if (isVarArg) llvm_unreachable("PTX does not support varargs"); + + MachineFunction &MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + break; + case CallingConv::PTX_Kernel: + MFI->setKernel(true); + break; + case CallingConv::PTX_Device: + MFI->setKernel(false); + break; + } + + // Make sure we don't add argument registers twice + if (MFI->isDoneAddArg()) + llvm_unreachable("cannot add argument registers twice"); + + // Reset argmap before allocation + for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap); + i != e; ++ i) + i->reset(); + + for (int i = 0, e = Ins.size(); i != e; ++ i) { + MVT::SimpleValueType VT = Ins[i].VT.SimpleTy; + + struct argmap_entry *entry = std::find(argmap, + argmap + array_lengthof(argmap), VT); + if (entry == argmap + array_lengthof(argmap)) + llvm_unreachable("Type of argument is not supported"); + + if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass) + llvm_unreachable("cannot pass preds to kernel"); + + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + + unsigned preg = *++(entry->loc); // allocate start from register 1 + unsigned vreg = RegInfo.createVirtualRegister(entry->RC); + RegInfo.addLiveIn(preg, vreg); + + MFI->addArgReg(preg); + + SDValue inval; + if (MFI->isKernel()) + inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain, + DAG.getTargetConstant(i, MVT::i32)); + else + inval = DAG.getCopyFromReg(Chain, dl, vreg, VT); + InVals.push_back(inval); + } + + MFI->doneAddArg(); + + return Chain; +} + +SDValue PTXTargetLowering:: + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, + SelectionDAG &DAG) const { + if (isVarArg) llvm_unreachable("PTX does not support varargs"); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention."); + case CallingConv::PTX_Kernel: + assert(Outs.size() == 0 && "Kernel must return void."); + return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); + case CallingConv::PTX_Device: + assert(Outs.size() <= 1 && "Can at most return one value."); + break; + } + + // PTX_Device + + // return void + if (Outs.size() == 0) + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); + + assert(Outs[0].VT == MVT::i32 && "Can return only basic types"); + + SDValue Flag; + unsigned reg = PTX::R0; + + MachineFunction &MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + MFI->setRetReg(reg); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) + DAG.getMachineFunction().getRegInfo().addLiveOut(reg); + + // Copy the result values into the output registers + Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad + Flag = Chain.getValue(1); + + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); +} diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h new file mode 100644 index 0000000..b03a9f6 --- /dev/null +++ b/lib/Target/PTX/PTXISelLowering.h @@ -0,0 +1,67 @@ +//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that PTX uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_ISEL_LOWERING_H +#define PTX_ISEL_LOWERING_H + +#include "llvm/Target/TargetLowering.h" + +namespace llvm { +class PTXSubtarget; +class PTXTargetMachine; + +namespace PTXISD { + enum NodeType { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + READ_PARAM, + EXIT, + RET + }; +} // namespace PTXISD + +class PTXTargetLowering : public TargetLowering { + public: + explicit PTXTargetLowering(TargetMachine &TM); + + virtual const char *getTargetNodeName(unsigned Opcode) const; + + virtual unsigned getFunctionAlignment(const Function *F) const { + return 2; } + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, + SelectionDAG &DAG) const; + + private: + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; +}; // class PTXTargetLowering +} // namespace llvm + +#endif // PTX_ISEL_LOWERING_H diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td new file mode 100644 index 0000000..e4e0999 --- /dev/null +++ b/lib/Target/PTX/PTXInstrFormats.td @@ -0,0 +1,24 @@ +//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// PTX Predicate operand, default to (0, 0) = (zero-reg, always). +// Leave PrintMethod empty; predicate printing is defined elsewhere. +def pred : PredicateOperand<OtherVT, (ops Preds, i32imm), + (ops (i1 zero_reg), (i32 0))>; + +let Namespace = "PTX" in { + class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern> + : Instruction { + dag OutOperandList = oops; + dag InOperandList = !con(iops, (ins pred:$_p)); + let AsmString = asmstr; // Predicate printing is defined elsewhere. + let Pattern = pattern; + let isPredicable = 1; + } +} diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp new file mode 100644 index 0000000..805759b --- /dev/null +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -0,0 +1,87 @@ +//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXInstrInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#include "PTXGenInstrInfo.inc" + +PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) + : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)), + RI(_TM, *this), TM(_TM) {} + +static const struct map_entry { + const TargetRegisterClass *cls; + const int opcode; +} map[] = { + { &PTX::RRegs32RegClass, PTX::MOVrr }, + { &PTX::PredsRegClass, PTX::MOVpp } +}; + +void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const { + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) + if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, I, DL, + get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + llvm_unreachable("Impossible reg-to-reg copy"); +} + +bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { + if (DstRC != SrcRC) + return false; + + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) + if (DstRC == map[i].cls) { + MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode), + DstReg).addReg(SrcReg); + if (MI->findFirstPredOperandIdx() == -1) { + MI->addOperand(MachineOperand::CreateReg(0, false)); + MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0)); + } + return true; + } + + return false; +} + +bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + switch (MI.getOpcode()) { + default: + return false; + case PTX::MOVpp: + case PTX::MOVrr: + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && + "Invalid register-register move instruction"); + SrcSubIdx = DstSubIdx = 0; // No sub-registers + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } +} diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h new file mode 100644 index 0000000..e7f00f0 --- /dev/null +++ b/lib/Target/PTX/PTXInstrInfo.h @@ -0,0 +1,75 @@ +//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_INSTR_INFO_H +#define PTX_INSTR_INFO_H + +#include "PTXRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { +class PTXTargetMachine; + +class PTXInstrInfo : public TargetInstrInfoImpl { + private: + const PTXRegisterInfo RI; + PTXTargetMachine &TM; + + public: + explicit PTXInstrInfo(PTXTargetMachine &_TM); + + virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; + + virtual bool isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + // static helper routines + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1) { + SDValue pred_reg = DAG->getRegister(0, MVT::i1); + SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32); + SDValue ops[] = { Op1, pred_reg, pred_imm }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); + } + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1, + SDValue Op2) { + SDValue pred_reg = DAG->getRegister(0, MVT::i1); + SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32); + SDValue ops[] = { Op1, Op2, pred_reg, pred_imm }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); + } + + }; // class PTXInstrInfo +} // namespace llvm + +#endif // PTX_INSTR_INFO_H diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td new file mode 100644 index 0000000..9a74778 --- /dev/null +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -0,0 +1,257 @@ +//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "PTXInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Instruction Pattern Stuff +//===----------------------------------------------------------------------===// + +def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::GLOBAL; + return false; +}]>; + +def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::CONSTANT; + return false; +}]>; + +def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::LOCAL; + return false; +}]>; + +def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::PARAMETER; + return false; +}]>; + +def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::SHARED; + return false; +}]>; + +def store_global + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::GLOBAL; + return false; +}]>; + +def store_local + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::LOCAL; + return false; +}]>; + +def store_parameter + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::PARAMETER; + return false; +}]>; + +def store_shared + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::SHARED; + return false; +}]>; + +// Addressing modes. +def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>; +def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>; + +// Address operands +def MEMri : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RRegs32, i32imm); +} +def MEMii : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def MEMpi : Operand<i32> { + let PrintMethod = "printParamOperand"; + let MIOperandInfo = (ops i32imm); +} + +//===----------------------------------------------------------------------===// +// PTX Specific Node Definitions +//===----------------------------------------------------------------------===// + +// PTX allow generic 3-reg shifts like shl r0, r1, r2 +def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>; +def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>; +def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>; + +def PTXexit + : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; +def PTXret + : SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>; + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +multiclass INT3<string opcstr, SDNode opnode> { + def rr : InstPTX<(outs RRegs32:$d), + (ins RRegs32:$a, RRegs32:$b), + !strconcat(opcstr, ".%type\t$d, $a, $b"), + [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>; + def ri : InstPTX<(outs RRegs32:$d), + (ins RRegs32:$a, i32imm:$b), + !strconcat(opcstr, ".%type\t$d, $a, $b"), + [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>; +} + +// no %type directive, non-communtable +multiclass INT3ntnc<string opcstr, SDNode opnode> { + def rr : InstPTX<(outs RRegs32:$d), + (ins RRegs32:$a, RRegs32:$b), + !strconcat(opcstr, "\t$d, $a, $b"), + [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>; + def ri : InstPTX<(outs RRegs32:$d), + (ins RRegs32:$a, i32imm:$b), + !strconcat(opcstr, "\t$d, $a, $b"), + [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>; + def ir : InstPTX<(outs RRegs32:$d), + (ins i32imm:$a, RRegs32:$b), + !strconcat(opcstr, "\t$d, $a, $b"), + [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>; +} + +multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> { + def rr : InstPTX<(outs RC:$d), + (ins MEMri:$a), + !strconcat(opstr, ".%type\t$d, [$a]"), + [(set RC:$d, (pat_load ADDRrr:$a))]>; + def ri : InstPTX<(outs RC:$d), + (ins MEMri:$a), + !strconcat(opstr, ".%type\t$d, [$a]"), + [(set RC:$d, (pat_load ADDRri:$a))]>; + def ii : InstPTX<(outs RC:$d), + (ins MEMii:$a), + !strconcat(opstr, ".%type\t$d, [$a]"), + [(set RC:$d, (pat_load ADDRii:$a))]>; +} + +multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> { + def rr : InstPTX<(outs), + (ins RC:$d, MEMri:$a), + !strconcat(opstr, ".%type\t[$a], $d"), + [(pat_store RC:$d, ADDRrr:$a)]>; + def ri : InstPTX<(outs), + (ins RC:$d, MEMri:$a), + !strconcat(opstr, ".%type\t[$a], $d"), + [(pat_store RC:$d, ADDRri:$a)]>; + def ii : InstPTX<(outs), + (ins RC:$d, MEMii:$a), + !strconcat(opstr, ".%type\t[$a], $d"), + [(pat_store RC:$d, ADDRii:$a)]>; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +///===- Integer Arithmetic Instructions -----------------------------------===// + +defm ADD : INT3<"add", add>; +defm SUB : INT3<"sub", sub>; + +///===- Logic and Shift Instructions --------------------------------------===// + +defm SHL : INT3ntnc<"shl.b32", PTXshl>; +defm SRL : INT3ntnc<"shr.u32", PTXsrl>; +defm SRA : INT3ntnc<"shr.s32", PTXsra>; + +///===- Data Movement and Conversion Instructions -------------------------===// + +let neverHasSideEffects = 1 in { + // rely on isMoveInstr to separate MOVpp, MOVrr, etc. + def MOVpp + : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>; + def MOVrr + : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>; +} + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def MOVpi + : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a", + [(set Preds:$d, imm:$a)]>; + def MOVri + : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a", + [(set RRegs32:$d, imm:$a)]>; +} + +defm LDg : PTX_LD<"ld.global", RRegs32, load_global>; +defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>; +defm LDl : PTX_LD<"ld.local", RRegs32, load_local>; +defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>; +defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>; + +def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a), + "ld.param.%type\t$d, [$a]", []>; + +defm STg : PTX_ST<"st.global", RRegs32, store_global>; +defm STl : PTX_ST<"st.local", RRegs32, store_local>; +// Store to parameter state space requires PTX 2.0 or higher? +// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>; +defm STs : PTX_ST<"st.shared", RRegs32, store_shared>; + +///===- Control Flow Instructions -----------------------------------------===// + +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; + def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; +} diff --git a/lib/Target/PTX/PTXMCAsmInfo.cpp b/lib/Target/PTX/PTXMCAsmInfo.cpp new file mode 100644 index 0000000..b670abd --- /dev/null +++ b/lib/Target/PTX/PTXMCAsmInfo.cpp @@ -0,0 +1,30 @@ +//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the PTXMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "PTXMCAsmInfo.h" + +using namespace llvm; + +PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) { + CommentString = "//"; + + PrivateGlobalPrefix = "$L__"; + + AllowPeriodsInName = false; + + HasSetDirective = false; + + HasDotTypeDotSizeDirective = false; + + HasSingleParameterDotFile = false; +} diff --git a/lib/Target/PTX/PTXMCAsmInfo.h b/lib/Target/PTX/PTXMCAsmInfo.h new file mode 100644 index 0000000..03f5d66 --- /dev/null +++ b/lib/Target/PTX/PTXMCAsmInfo.h @@ -0,0 +1,28 @@ +//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the PTXMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_MCASM_INFO_H +#define PTX_MCASM_INFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + struct PTXMCAsmInfo : public MCAsmInfo { + explicit PTXMCAsmInfo(const Target &T, const StringRef &TT); + }; +} // namespace llvm + +#endif // PTX_MCASM_INFO_H diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp new file mode 100644 index 0000000..0886ba8 --- /dev/null +++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -0,0 +1,542 @@ +//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetAsmInfo.h" + +using namespace llvm; + +namespace { +class PTXMCAsmStreamer : public MCStreamer { + formatted_raw_ostream &OS; + const MCAsmInfo &MAI; + OwningPtr<MCInstPrinter> InstPrinter; + OwningPtr<MCCodeEmitter> Emitter; + + SmallString<128> CommentToEmit; + raw_svector_ostream CommentStream; + + unsigned IsVerboseAsm : 1; + unsigned ShowInst : 1; + +public: + PTXMCAsmStreamer(MCContext &Context, + formatted_raw_ostream &os, + bool isVerboseAsm, bool useLoc, + MCInstPrinter *printer, + MCCodeEmitter *emitter, + bool showInst) + : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), + InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit), + IsVerboseAsm(isVerboseAsm), + ShowInst(showInst) { + if (InstPrinter && IsVerboseAsm) + InstPrinter->setCommentStream(CommentStream); + } + + ~PTXMCAsmStreamer() {} + + inline void EmitEOL() { + // If we don't have any comments, just emit a \n. + if (!IsVerboseAsm) { + OS << '\n'; + return; + } + EmitCommentsAndEOL(); + } + void EmitCommentsAndEOL(); + + /// isVerboseAsm - Return true if this streamer supports verbose assembly at + /// all. + virtual bool isVerboseAsm() const { return IsVerboseAsm; } + + /// hasRawTextSupport - We support EmitRawText. + virtual bool hasRawTextSupport() const { return true; } + + /// AddComment - Add a comment that can be emitted to the generated .s + /// file if applicable as a QoI issue to make the output of the compiler + /// more readable. This only affects the MCAsmStreamer, and only when + /// verbose assembly output is enabled. + virtual void AddComment(const Twine &T); + + /// AddEncodingComment - Add a comment showing the encoding of an instruction. + virtual void AddEncodingComment(const MCInst &Inst); + + /// GetCommentOS - Return a raw_ostream that comments can be written to. + /// Unlike AddComment, you are required to terminate comments with \n if you + /// use this method. + virtual raw_ostream &GetCommentOS() { + if (!IsVerboseAsm) + return nulls(); // Discard comments unless in verbose asm mode. + return CommentStream; + } + + /// AddBlankLine - Emit a blank line to a .s file to pretty it up. + virtual void AddBlankLine() { + EmitEOL(); + } + + /// @name MCStreamer Interface + /// @{ + + virtual void ChangeSection(const MCSection *Section); + virtual void InitSections() {} + + virtual void EmitLabel(MCSymbol *Symbol); + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + + virtual void EmitThumbFunc(MCSymbol *Func); + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); + + virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, + const MCSymbol *LastLabel, + const MCSymbol *Label); + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + + /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. + /// + /// @param Symbol - The common symbol to emit. + /// @param Size - The size of the common symbol. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); + + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + bool isPCRel, unsigned AddrSpace); + virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + virtual void EmitGPRel32Value(const MCExpr *Value); + + + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace); + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename); + virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename); + + virtual void EmitInstruction(const MCInst &Inst); + + /// EmitRawText - If this file is backed by an assembly streamer, this dumps + /// the specified string in the output .s file. This capability is + /// indicated by the hasRawTextSupport() predicate. + virtual void EmitRawText(StringRef String); + + virtual void Finish(); + + /// @} + +}; // class PTXMCAsmStreamer + +} + +/// TODO: Add appropriate implementation of Emit*() methods when needed + +void PTXMCAsmStreamer::AddComment(const Twine &T) { + if (!IsVerboseAsm) return; + + // Make sure that CommentStream is flushed. + CommentStream.flush(); + + T.toVector(CommentToEmit); + // Each comment goes on its own line. + CommentToEmit.push_back('\n'); + + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +void PTXMCAsmStreamer::EmitCommentsAndEOL() { + if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) { + OS << '\n'; + return; + } + + CommentStream.flush(); + StringRef Comments = CommentToEmit.str(); + + assert(Comments.back() == '\n' && + "Comment array not newline terminated"); + do { + // Emit a line of comments. + OS.PadToColumn(MAI.getCommentColumn()); + size_t Position = Comments.find('\n'); + OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n'; + + Comments = Comments.substr(Position+1); + } while (!Comments.empty()); + + CommentToEmit.clear(); + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { + assert(Bytes && "Invalid size!"); + return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); +} + +void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); +} + +void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(getCurrentSection() && "Cannot emit before setting section!"); + + OS << *Symbol << MAI.getLabelSuffix(); + EmitEOL(); + Symbol->setSection(*getCurrentSection()); +} + +void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {} + +void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {} + +void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + OS << *Symbol << " = " << *Value; + EmitEOL(); + + // FIXME: Lift context changes into super class. + Symbol->setVariableValue(Value); +} + +void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias, + const MCSymbol *Symbol) { + OS << ".weakref " << *Alias << ", " << *Symbol; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, + const MCSymbol *LastLabel, + const MCSymbol *Label) { + report_fatal_error("Unimplemented."); +} + +void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) {} + +void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + +void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {} + +void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {} + +void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {} + +void PTXMCAsmStreamer::EndCOFFSymbolDef() {} + +void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} + +void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} + +void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} + +void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) {} + +void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, + MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} + +static inline char toOctal(int X) { return (X&7)+'0'; } + +static void PrintQuotedString(StringRef Data, raw_ostream &OS) { + OS << '"'; + + for (unsigned i = 0, e = Data.size(); i != e; ++i) { + unsigned char C = Data[i]; + if (C == '"' || C == '\\') { + OS << '\\' << (char)C; + continue; + } + + if (isprint((unsigned char)C)) { + OS << (char)C; + continue; + } + + switch (C) { + case '\b': OS << "\\b"; break; + case '\f': OS << "\\f"; break; + case '\n': OS << "\\n"; break; + case '\r': OS << "\\r"; break; + case '\t': OS << "\\t"; break; + default: + OS << '\\'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + break; + } + } + + OS << '"'; +} + +void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + if (Data.empty()) return; + + if (Data.size() == 1) { + OS << MAI.getData8bitsDirective(AddrSpace); + OS << (unsigned)(unsigned char)Data[0]; + EmitEOL(); + return; + } + + // If the data ends with 0 and the target supports .asciz, use it, otherwise + // use .ascii + if (MAI.getAscizDirective() && Data.back() == 0) { + OS << MAI.getAscizDirective(); + Data = Data.substr(0, Data.size()-1); + } else { + OS << MAI.getAsciiDirective(); + } + + OS << ' '; + PrintQuotedString(Data, OS); + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, + bool isPCRel, unsigned AddrSpace) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + assert(!isPCRel && "Cannot emit pc relative relocations!"); + const char *Directive = 0; + switch (Size) { + default: break; + case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; + case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; + case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; + case 8: + Directive = MAI.getData64bitsDirective(AddrSpace); + // If the target doesn't support 64-bit data, emit as two 32-bit halves. + if (Directive) break; + int64_t IntValue; + if (!Value->EvaluateAsAbsolute(IntValue)) + report_fatal_error("Don't know how to emit this value."); + if (getContext().getTargetAsmInfo().isLittleEndian()) { + EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); + EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); + } else { + EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); + EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); + } + return; + } + + assert(Directive && "Invalid size for machine code value!"); + OS << Directive << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value, + unsigned AddrSpace) { + assert(MAI.hasLEB128() && "Cannot print a .uleb"); + OS << ".uleb128 " << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, + unsigned AddrSpace) { + assert(MAI.hasLEB128() && "Cannot print a .sleb"); + OS << ".sleb128 " << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { + assert(MAI.getGPRel32Directive() != 0); + OS << MAI.getGPRel32Directive() << *Value; + EmitEOL(); +} + + +/// EmitFill - Emit NumBytes bytes worth of the value specified by +/// FillValue. This implements directives such as '.space'. +void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + if (NumBytes == 0) return; + + if (AddrSpace == 0) + if (const char *ZeroDirective = MAI.getZeroDirective()) { + OS << ZeroDirective << NumBytes; + if (FillValue != 0) + OS << ',' << (int)FillValue; + EmitEOL(); + return; + } + + // Emit a byte at a time. + MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); +} + +void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { + // Some assemblers don't support non-power of two alignments, so we always + // emit alignments as a power of two if possible. + if (isPowerOf2_32(ByteAlignment)) { + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << MAI.getAlignDirective(); break; + // FIXME: use MAI for this! + case 2: OS << ".p2alignw "; break; + case 4: OS << ".p2alignl "; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + if (MAI.getAlignmentIsInBytes()) + OS << ByteAlignment; + else + OS << Log2_32(ByteAlignment); + + if (Value || MaxBytesToEmit) { + OS << ", 0x"; + OS.write_hex(truncateToSize(Value, ValueSize)); + + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + } + EmitEOL(); + return; + } + + // Non-power of two alignment. This is not widely supported by assemblers. + // FIXME: Parameterize this based on MAI. + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << ".balign"; break; + case 2: OS << ".balignw"; break; + case 4: OS << ".balignl"; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + OS << ' ' << ByteAlignment; + OS << ", " << truncateToSize(Value, ValueSize); + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) {} + +void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) {} + + +void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) { + assert(MAI.hasSingleParameterDotFile()); + OS << "\t.file\t"; + PrintQuotedString(Filename, OS); + EmitEOL(); +} + +// FIXME: should we inherit from MCAsmStreamer? +bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, + StringRef Filename){ + OS << "\t.file\t" << FileNo << ' '; + PrintQuotedString(Filename, OS); + EmitEOL(); + return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename); +} + +void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {} + +void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + + // Show the encoding in a comment if we have a code emitter. + if (Emitter) + AddEncodingComment(Inst); + + // Show the MCInst if enabled. + if (ShowInst) { + Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); + GetCommentOS() << "\n"; + } + + // If we have an AsmPrinter, use that to print, otherwise print the MCInst. + if (InstPrinter) + InstPrinter->printInst(&Inst, OS); + else + Inst.print(OS, &MAI); + EmitEOL(); +} + +/// EmitRawText - If this file is backed by an assembly streamer, this dumps +/// the specified string in the output .s file. This capability is +/// indicated by the hasRawTextSupport() predicate. +void PTXMCAsmStreamer::EmitRawText(StringRef String) { + if (!String.empty() && String.back() == '\n') + String = String.substr(0, String.size()-1); + OS << String; + EmitEOL(); +} + +void PTXMCAsmStreamer::Finish() {} + +namespace llvm { + MCStreamer *createPTXAsmStreamer(MCContext &Context, + formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, + MCInstPrinter *IP, + MCCodeEmitter *CE, TargetAsmBackend *TAB, + bool ShowInst) { + return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc, + IP, CE, ShowInst); + } +} diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp new file mode 100644 index 0000000..b37c740 --- /dev/null +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -0,0 +1,96 @@ +//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an information extractor for PTX machine functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-mf-info-extract" + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "PTXMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +// NOTE: PTXMFInfoExtract must after register allocation! + +namespace llvm { + /// PTXMFInfoExtract - PTX specific code to extract of PTX machine + /// function information for PTXAsmPrinter + /// + class PTXMFInfoExtract : public MachineFunctionPass { + private: + static char ID; + + public: + PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) + : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PTX Machine Function Info Extractor"; + } + }; // class PTXMFInfoExtract +} // namespace llvm + +using namespace llvm; + +char PTXMFInfoExtract::ID = 0; + +bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); + + unsigned retreg = MFI->retReg(); + + DEBUG(dbgs() + << "PTX::NoRegister == " << PTX::NoRegister << "\n" + << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); + + DEBUG(for (unsigned reg = PTX::NoRegister + 1; + reg < PTX::NUM_TARGET_REGS; ++reg) + if (MRI.isPhysRegUsed(reg)) + dbgs() << "Used Reg: " << reg << "\n";); + + // FIXME: This is a slow linear scanning + for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) + if (MRI.isPhysRegUsed(reg) && + reg != retreg && + (MFI->isKernel() || !MFI->isArgReg(reg))) + MFI->addLocalVarReg(reg); + + // Notify MachineFunctionInfo that I've done adding local var reg + MFI->doneAddLocalVar(); + + DEBUG(dbgs() << "Return Reg: " << retreg << "\n"); + + DEBUG(for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(); + i != e; ++i) + dbgs() << "Arg Reg: " << *i << "\n";); + + DEBUG(for (PTXMachineFunctionInfo::reg_iterator + i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); + i != e; ++i) + dbgs() << "Local Var Reg: " << *i << "\n";); + + return false; +} + +FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXMFInfoExtract(TM, OptLevel); +} diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h new file mode 100644 index 0000000..56d044b --- /dev/null +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -0,0 +1,79 @@ +//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares PTX-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_MACHINE_FUNCTION_INFO_H +#define PTX_MACHINE_FUNCTION_INFO_H + +#include "PTX.h" +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { +/// PTXMachineFunctionInfo - This class is derived from MachineFunction and +/// contains private PTX target-specific information for each MachineFunction. +/// +class PTXMachineFunctionInfo : public MachineFunctionInfo { +private: + bool is_kernel; + std::vector<unsigned> reg_arg, reg_local_var; + unsigned reg_ret; + bool _isDoneAddArg; + +public: + PTXMachineFunctionInfo(MachineFunction &MF) + : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) { + reg_arg.reserve(8); + reg_local_var.reserve(32); + } + + void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; } + + void addArgReg(unsigned reg) { reg_arg.push_back(reg); } + void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } + void setRetReg(unsigned reg) { reg_ret = reg; } + + void doneAddArg(void) { + std::sort(reg_arg.begin(), reg_arg.end()); + _isDoneAddArg = true; + } + void doneAddLocalVar(void) { + std::sort(reg_local_var.begin(), reg_local_var.end()); + } + + bool isDoneAddArg(void) { return _isDoneAddArg; } + + bool isKernel() const { return is_kernel; } + + typedef std::vector<unsigned>::const_iterator reg_iterator; + + bool argRegEmpty() const { return reg_arg.empty(); } + int getNumArg() const { return reg_arg.size(); } + reg_iterator argRegBegin() const { return reg_arg.begin(); } + reg_iterator argRegEnd() const { return reg_arg.end(); } + + bool localVarRegEmpty() const { return reg_local_var.empty(); } + reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } + reg_iterator localVarRegEnd() const { return reg_local_var.end(); } + + unsigned retReg() const { return reg_ret; } + + bool isArgReg(unsigned reg) const { + return std::binary_search(reg_arg.begin(), reg_arg.end(), reg); + } + + bool isLocalVarReg(unsigned reg) const { + return std::binary_search(reg_local_var.begin(), reg_local_var.end(), reg); + } +}; // class PTXMachineFunctionInfo +} // namespace llvm + +#endif // PTX_MACHINE_FUNCTION_INFO_H diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp new file mode 100644 index 0000000..0f3e7bc --- /dev/null +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -0,0 +1,19 @@ +//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXRegisterInfo.h" + +using namespace llvm; + +#include "PTXGenRegisterInfo.inc" diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h new file mode 100644 index 0000000..67e130f --- /dev/null +++ b/lib/Target/PTX/PTXRegisterInfo.h @@ -0,0 +1,63 @@ +//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_REGISTER_INFO_H +#define PTX_REGISTER_INFO_H + +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/BitVector.h" + +#include "PTXGenRegisterInfo.h.inc" + +namespace llvm { +class PTXTargetMachine; +class MachineFunction; + +struct PTXRegisterInfo : public PTXGenRegisterInfo { + PTXRegisterInfo(PTXTargetMachine &TM, + const TargetInstrInfo &TII) {} + + virtual const unsigned + *getCalleeSavedRegs(const MachineFunction *MF = 0) const { + static const unsigned CalleeSavedRegs[] = { 0 }; + return CalleeSavedRegs; // save nothing + } + + virtual BitVector getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + return Reserved; // reserve no regs + } + + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, + RegScavenger *RS = NULL) const { + llvm_unreachable("PTX does not support general function call"); + } + + virtual unsigned getFrameRegister(const MachineFunction &MF) const { + llvm_unreachable("PTX does not have a frame register"); + return 0; + } + + virtual unsigned getRARegister() const { + llvm_unreachable("PTX does not have a return address register"); + return 0; + } + + virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const { + return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); + } +}; // struct PTXRegisterInfo +} // namespace llvm + +#endif // PTX_REGISTER_INFO_H diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td new file mode 100644 index 0000000..22e2b34 --- /dev/null +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -0,0 +1,102 @@ +//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class PTXReg<string n> : Register<n> { + let Namespace = "PTX"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +def P0 : PTXReg<"p0">; +def P1 : PTXReg<"p1">; +def P2 : PTXReg<"p2">; +def P3 : PTXReg<"p3">; +def P4 : PTXReg<"p4">; +def P5 : PTXReg<"p5">; +def P6 : PTXReg<"p6">; +def P7 : PTXReg<"p7">; +def P8 : PTXReg<"p8">; +def P9 : PTXReg<"p9">; +def P10 : PTXReg<"p10">; +def P11 : PTXReg<"p11">; +def P12 : PTXReg<"p12">; +def P13 : PTXReg<"p13">; +def P14 : PTXReg<"p14">; +def P15 : PTXReg<"p15">; +def P16 : PTXReg<"p16">; +def P17 : PTXReg<"p17">; +def P18 : PTXReg<"p18">; +def P19 : PTXReg<"p19">; +def P20 : PTXReg<"p20">; +def P21 : PTXReg<"p21">; +def P22 : PTXReg<"p22">; +def P23 : PTXReg<"p23">; +def P24 : PTXReg<"p24">; +def P25 : PTXReg<"p25">; +def P26 : PTXReg<"p26">; +def P27 : PTXReg<"p27">; +def P28 : PTXReg<"p28">; +def P29 : PTXReg<"p29">; +def P30 : PTXReg<"p30">; +def P31 : PTXReg<"p31">; + +def R0 : PTXReg<"r0">; +def R1 : PTXReg<"r1">; +def R2 : PTXReg<"r2">; +def R3 : PTXReg<"r3">; +def R4 : PTXReg<"r4">; +def R5 : PTXReg<"r5">; +def R6 : PTXReg<"r6">; +def R7 : PTXReg<"r7">; +def R8 : PTXReg<"r8">; +def R9 : PTXReg<"r9">; +def R10 : PTXReg<"r10">; +def R11 : PTXReg<"r11">; +def R12 : PTXReg<"r12">; +def R13 : PTXReg<"r13">; +def R14 : PTXReg<"r14">; +def R15 : PTXReg<"r15">; +def R16 : PTXReg<"r16">; +def R17 : PTXReg<"r17">; +def R18 : PTXReg<"r18">; +def R19 : PTXReg<"r19">; +def R20 : PTXReg<"r20">; +def R21 : PTXReg<"r21">; +def R22 : PTXReg<"r22">; +def R23 : PTXReg<"r23">; +def R24 : PTXReg<"r24">; +def R25 : PTXReg<"r25">; +def R26 : PTXReg<"r26">; +def R27 : PTXReg<"r27">; +def R28 : PTXReg<"r28">; +def R29 : PTXReg<"r29">; +def R30 : PTXReg<"r30">; +def R31 : PTXReg<"r31">; + +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// + +def Preds : RegisterClass<"PTX", [i1], 8, + [P0, P1, P2, P3, P4, P5, P6, P7, + P8, P9, P10, P11, P12, P13, P14, P15, + P16, P17, P18, P19, P20, P21, P22, P23, + P24, P25, P26, P27, P28, P29, P30, P31]>; + +def RRegs32 : RegisterClass<"PTX", [i32], 32, + [R0, R1, R2, R3, R4, R5, R6, R7, + R8, R9, R10, R11, R12, R13, R14, R15, + R16, R17, R18, R19, R20, R21, R22, R23, + R24, R25, R26, R27, R28, R29, R30, R31]>; diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp new file mode 100644 index 0000000..00e2c88 --- /dev/null +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -0,0 +1,23 @@ +//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTX specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "PTXSubtarget.h" + +using namespace llvm; + +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) { + std::string TARGET = "sm_20"; + // TODO: call ParseSubtargetFeatures(FS, TARGET); +} + +#include "PTXGenSubtarget.inc" diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h new file mode 100644 index 0000000..7fd85f8 --- /dev/null +++ b/lib/Target/PTX/PTXSubtarget.h @@ -0,0 +1,32 @@ +//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PTX specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_SUBTARGET_H +#define PTX_SUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" + +namespace llvm { + class PTXSubtarget : public TargetSubtarget { + private: + bool is_sm20; + + public: + PTXSubtarget(const std::string &TT, const std::string &FS); + + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + }; // class PTXSubtarget +} // namespace llvm + +#endif // PTX_SUBTARGET_H diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp new file mode 100644 index 0000000..b263813 --- /dev/null +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -0,0 +1,60 @@ +//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the PTX target. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXMCAsmInfo.h" +#include "PTXTargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetRegistry.h" + +using namespace llvm; + +namespace llvm { + MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, + MCInstPrinter *InstPrint, + MCCodeEmitter *CE, + TargetAsmBackend *TAB, + bool ShowInst); +} + +extern "C" void LLVMInitializePTXTarget() { + RegisterTargetMachine<PTXTargetMachine> X(ThePTXTarget); + RegisterAsmInfo<PTXMCAsmInfo> Y(ThePTXTarget); + TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer); +} + +// DataLayout and FrameLowering are filled with dummy data +PTXTargetMachine::PTXTargetMachine(const Target &T, + const std::string &TT, + const std::string &FS) + : LLVMTargetMachine(T, TT), + DataLayout("e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"), + FrameLowering(Subtarget), + InstrInfo(*this), + TLInfo(*this), + Subtarget(TT, FS) { +} + +bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createPTXISelDag(*this, OptLevel)); + return false; +} + +bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // PTXMFInfoExtract must after register allocation! + PM.add(createPTXMFInfoExtract(*this, OptLevel)); + return false; +} diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h new file mode 100644 index 0000000..728e36f --- /dev/null +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -0,0 +1,60 @@ +//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PTX specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_TARGET_MACHINE_H +#define PTX_TARGET_MACHINE_H + +#include "PTXISelLowering.h" +#include "PTXInstrInfo.h" +#include "PTXFrameLowering.h" +#include "PTXSubtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class PTXTargetMachine : public LLVMTargetMachine { + private: + const TargetData DataLayout; + PTXFrameLowering FrameLowering; + PTXInstrInfo InstrInfo; + PTXTargetLowering TLInfo; + PTXSubtarget Subtarget; + + public: + PTXTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); + + virtual const TargetData *getTargetData() const { return &DataLayout; } + + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + + virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); } + + virtual const PTXTargetLowering *getTargetLowering() const { + return &TLInfo; } + + virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } + + virtual bool addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + virtual bool addPostRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); +}; // class PTXTargetMachine +} // namespace llvm + +#endif // PTX_TARGET_MACHINE_H diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..4b09cf5 --- /dev/null +++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMPTXInfo + PTXTargetInfo.cpp + ) + +add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen) diff --git a/lib/Target/PTX/TargetInfo/Makefile b/lib/Target/PTX/TargetInfo/Makefile new file mode 100644 index 0000000..8619785 --- /dev/null +++ b/lib/Target/PTX/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMPTXInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp new file mode 100644 index 0000000..a577d77 --- /dev/null +++ b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp @@ -0,0 +1,21 @@ +//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" + +using namespace llvm; + +Target llvm::ThePTXTarget; + +extern "C" void LLVMInitializePTXTargetInfo() { + // see llvm/ADT/Triple.h + RegisterTarget<Triple::ptx> X(ThePTXTarget, "ptx", "PTX"); +} diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 42cd486..0000000 --- a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPowerPCAsmPrinter - PPCAsmPrinter.cpp - ) -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen) diff --git a/lib/Target/PowerPC/AsmPrinter/Makefile b/lib/Target/PowerPC/AsmPrinter/Makefile deleted file mode 100644 index bd5dce1..0000000 --- a/lib/Target/PowerPC/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPowerPCAsmPrinter - -# Hack: we need to include 'main' PowerPC target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp deleted file mode 100644 index c1a5663..0000000 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ /dev/null @@ -1,922 +0,0 @@ -//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to PowerPC assembly language. This printer is -// the output mechanism used by `llc'. -// -// Documentation at http://developer.apple.com/documentation/DeveloperTools/ -// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asmprinter" -#include "PPC.h" -#include "PPCPredicates.h" -#include "PPCTargetMachine.h" -#include "PPCSubtarget.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/ADT/SmallString.h" -using namespace llvm; - -namespace { - class PPCAsmPrinter : public AsmPrinter { - protected: - DenseMap<MCSymbol*, MCSymbol*> TOC; - const PPCSubtarget &Subtarget; - uint64_t LabelID; - public: - explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), - Subtarget(TM.getSubtarget<PPCSubtarget>()), LabelID(0) {} - - virtual const char *getPassName() const { - return "PowerPC Assembly Printer"; - } - - PPCTargetMachine &getTM() { - return static_cast<PPCTargetMachine&>(TM); - } - - unsigned enumRegToMachineReg(unsigned enumReg) { - switch (enumReg) { - default: llvm_unreachable("Unhandled register!"); - case PPC::CR0: return 0; - case PPC::CR1: return 1; - case PPC::CR2: return 2; - case PPC::CR3: return 3; - case PPC::CR4: return 4; - case PPC::CR5: return 5; - case PPC::CR6: return 6; - case PPC::CR7: return 7; - } - llvm_unreachable(0); - } - - /// printInstruction - This method is automatically generated by tablegen - /// from the instruction set description. This method returns true if the - /// machine instruction was sufficiently described to print it, otherwise it - /// returns false. - void printInstruction(const MachineInstr *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - - virtual void EmitInstruction(const MachineInstr *MI); - void printOp(const MachineOperand &MO, raw_ostream &O); - - /// stripRegisterPrefix - This method strips the character prefix from a - /// register name so that only the number is left. Used by for linux asm. - const char *stripRegisterPrefix(const char *RegName) { - switch (RegName[0]) { - case 'r': - case 'f': - case 'v': return RegName + 1; - case 'c': if (RegName[1] == 'r') return RegName + 2; - } - - return RegName; - } - - /// printRegister - Print register according to target requirements. - /// - void printRegister(const MachineOperand &MO, bool R0AsZero, raw_ostream &O){ - unsigned RegNo = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); - - // If we should use 0 for R0. - if (R0AsZero && RegNo == PPC::R0) { - O << "0"; - return; - } - - const char *RegName = getRegisterName(RegNo); - // Linux assembler (Others?) does not take register mnemonics. - // FIXME - What about special registers used in mfspr/mtspr? - if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName); - O << RegName; - } - - void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - if (MO.isReg()) { - printRegister(MO, false, O); - } else if (MO.isImm()) { - O << MO.getImm(); - } else { - printOp(MO, O); - } - } - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - - void printS5ImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - char value = MI->getOperand(OpNo).getImm(); - value = (value << (32-5)) >> (32-5); - O << (int)value; - } - void printU5ImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - unsigned char value = MI->getOperand(OpNo).getImm(); - assert(value <= 31 && "Invalid u5imm argument!"); - O << (unsigned int)value; - } - void printU6ImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - unsigned char value = MI->getOperand(OpNo).getImm(); - assert(value <= 63 && "Invalid u6imm argument!"); - O << (unsigned int)value; - } - void printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - O << (short)MI->getOperand(OpNo).getImm(); - } - void printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - O << (unsigned short)MI->getOperand(OpNo).getImm(); - } - void printS16X4ImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - O << (short)(MI->getOperand(OpNo).getImm()*4); - } else { - O << "lo16("; - printOp(MI->getOperand(OpNo), O); - if (TM.getRelocationModel() == Reloc::PIC_) - O << "-\"L" << getFunctionNumber() << "$pb\")"; - else - O << ')'; - } - } - void printBranchOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - // Branches can take an immediate operand. This is used by the branch - // selection pass to print $+8, an eight byte displacement from the PC. - if (MI->getOperand(OpNo).isImm()) { - O << "$+" << MI->getOperand(OpNo).getImm()*4; - } else { - printOp(MI->getOperand(OpNo), O); - } - } - void printCallOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - if (TM.getRelocationModel() != Reloc::Static) { - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - if (GV->isDeclaration() || GV->isWeakForLinker()) { - // Dynamically-resolved functions need a stub for the function. - MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); - O << *Sym; - return; - } - } - if (MO.isSymbol()) { - SmallString<128> TempNameStr; - TempNameStr += StringRef(MO.getSymbolName()); - TempNameStr += StringRef("$stub"); - - MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true); - O << *Sym; - return; - } - } - - printOp(MI->getOperand(OpNo), O); - } - void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - O << (int)MI->getOperand(OpNo).getImm()*4; - } - void printPICLabel(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - O << "\"L" << getFunctionNumber() << "$pb\"\n"; - O << "\"L" << getFunctionNumber() << "$pb\":"; - } - void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - if (Subtarget.isDarwin()) O << "ha16("; - printOp(MI->getOperand(OpNo), O); - if (TM.getRelocationModel() == Reloc::PIC_) - O << "-\"L" << getFunctionNumber() << "$pb\""; - if (Subtarget.isDarwin()) - O << ')'; - else - O << "@ha"; - } - } - void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) { - printS16ImmOperand(MI, OpNo, O); - } else { - if (Subtarget.isDarwin()) O << "lo16("; - printOp(MI->getOperand(OpNo), O); - if (TM.getRelocationModel() == Reloc::PIC_) - O << "-\"L" << getFunctionNumber() << "$pb\""; - if (Subtarget.isDarwin()) - O << ')'; - else - O << "@l"; - } - } - void printcrbitm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - unsigned CCReg = MI->getOperand(OpNo).getReg(); - unsigned RegNo = enumRegToMachineReg(CCReg); - O << (0x80 >> RegNo); - } - // The new addressing mode printers. - void printMemRegImm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - printSymbolLo(MI, OpNo, O); - O << '('; - if (MI->getOperand(OpNo+1).isReg() && - MI->getOperand(OpNo+1).getReg() == PPC::R0) - O << "0"; - else - printOperand(MI, OpNo+1, O); - O << ')'; - } - void printMemRegImmShifted(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - if (MI->getOperand(OpNo).isImm()) - printS16X4ImmOperand(MI, OpNo, O); - else - printSymbolLo(MI, OpNo, O); - O << '('; - if (MI->getOperand(OpNo+1).isReg() && - MI->getOperand(OpNo+1).getReg() == PPC::R0) - O << "0"; - else - printOperand(MI, OpNo+1, O); - O << ')'; - } - - void printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - // When used as the base register, r0 reads constant zero rather than - // the value contained in the register. For this reason, the darwin - // assembler requires that we print r0 as 0 (no r) when used as the base. - const MachineOperand &MO = MI->getOperand(OpNo); - printRegister(MO, true, O); - O << ", "; - printOperand(MI, OpNo+1, O); - } - - void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNo); - assert(MO.isGlobal()); - MCSymbol *Sym = Mang->getSymbol(MO.getGlobal()); - - // Map symbol -> label of TOC entry. - MCSymbol *&TOCEntry = TOC[Sym]; - if (TOCEntry == 0) - TOCEntry = OutContext. - GetOrCreateSymbol(StringRef(MAI->getPrivateGlobalPrefix()) + - "C" + Twine(LabelID++)); - - O << *TOCEntry << "@toc"; - } - - void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier); - - MachineLocation getDebugValueLocation(const MachineInstr *MI) const { - - MachineLocation Location; - assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; - } - }; - - /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux - class PPCLinuxAsmPrinter : public PPCAsmPrinter { - public: - explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : PPCAsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "Linux PPC Assembly Printer"; - } - - bool doFinalization(Module &M); - - virtual void EmitFunctionEntryLabel(); - }; - - /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac - /// OS X - class PPCDarwinAsmPrinter : public PPCAsmPrinter { - public: - explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : PPCAsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "Darwin PPC Assembly Printer"; - } - - bool doFinalization(Module &M); - void EmitStartOfAsmFile(Module &M); - - void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs); - }; -} // end of anonymous namespace - -// Include the auto-generated portion of the assembly writer -#include "PPCGenAsmWriter.inc" - -void PPCAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { - switch (MO.getType()) { - case MachineOperand::MO_Immediate: - llvm_unreachable("printOp() does not handle immediate values"); - - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - // FIXME: PIC relocation model - return; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - case MachineOperand::MO_BlockAddress: - O << *GetBlockAddressSymbol(MO.getBlockAddress()); - return; - case MachineOperand::MO_ExternalSymbol: { - // Computing the address of an external symbol, not calling it. - if (TM.getRelocationModel() == Reloc::Static) { - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - return; - } - - MCSymbol *NLPSym = - OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+ - MO.getSymbolName()+"$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true); - - O << *NLPSym; - return; - } - case MachineOperand::MO_GlobalAddress: { - // Computing the address of a global symbol, not calling it. - const GlobalValue *GV = MO.getGlobal(); - MCSymbol *SymToPrint; - - // External or weakly linked global variables need non-lazily-resolved stubs - if (TM.getRelocationModel() != Reloc::Static && - (GV->isDeclaration() || GV->isWeakForLinker())) { - if (!GV->hasHiddenVisibility()) { - SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>() - .getGVStubEntry(SymToPrint); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); - } else if (GV->isDeclaration() || GV->hasCommonLinkage() || - GV->hasAvailableExternallyLinkage()) { - SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - - MachineModuleInfoImpl::StubValueTy &StubSym = - MMI->getObjFileInfo<MachineModuleInfoMachO>(). - getHiddenGVStubEntry(SymToPrint); - if (StubSym.getPointer() == 0) - StubSym = MachineModuleInfoImpl:: - StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); - } else { - SymToPrint = Mang->getSymbol(GV); - } - } else { - SymToPrint = Mang->getSymbol(GV); - } - - O << *SymToPrint; - - printOffset(MO.getOffset(), O); - return; - } - - default: - O << "<unknown operand type: " << MO.getType() << ">"; - return; - } -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: return true; // Unknown modifier. - case 'c': // Don't print "$" before a global var name or constant. - // PPC never has a prefix. - printOperand(MI, OpNo, O); - return false; - case 'L': // Write second word of DImode reference. - // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNo).isReg() || - OpNo+1 == MI->getNumOperands() || - !MI->getOperand(OpNo+1).isReg()) - return true; - ++OpNo; // Return the high-part. - break; - case 'I': - // Write 'i' if an integer constant, otherwise nothing. Used to print - // addi vs add, etc. - if (MI->getOperand(OpNo).isImm()) - O << "i"; - return false; - } - } - - printOperand(MI, OpNo, O); - return false; -} - -// At the moment, all inline asm memory operands are a single register. -// In any case, the output of this routine should always be just one -// assembler operand. - -bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - assert (MI->getOperand(OpNo).isReg()); - O << "0("; - printOperand(MI, OpNo, O); - O << ")"; - return false; -} - -void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier){ - assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!"); - unsigned Code = MI->getOperand(OpNo).getImm(); - if (!strcmp(Modifier, "cc")) { - switch ((PPC::Predicate)Code) { - case PPC::PRED_ALWAYS: return; // Don't print anything for always. - case PPC::PRED_LT: O << "lt"; return; - case PPC::PRED_LE: O << "le"; return; - case PPC::PRED_EQ: O << "eq"; return; - case PPC::PRED_GE: O << "ge"; return; - case PPC::PRED_GT: O << "gt"; return; - case PPC::PRED_NE: O << "ne"; return; - case PPC::PRED_UN: O << "un"; return; - case PPC::PRED_NU: O << "nu"; return; - } - - } else { - assert(!strcmp(Modifier, "reg") && - "Need to specify 'cc' or 'reg' as predicate op modifier!"); - // Don't print the register for 'always'. - if (Code == PPC::PRED_ALWAYS) return; - printOperand(MI, OpNo+1, O); - } -} - - -/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to -/// the current output stream. -/// -void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream O(Str); - - if (MI->getOpcode() == TargetOpcode::DBG_VALUE) { - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); - O << V.getName(); - O << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O); - O << ']'; - O << "+"; - printOperand(MI, NOps-2, O); - OutStreamer.EmitRawText(O.str()); - return; - } - // Check for slwi/srwi mnemonics. - if (MI->getOpcode() == PPC::RLWINM) { - unsigned char SH = MI->getOperand(2).getImm(); - unsigned char MB = MI->getOperand(3).getImm(); - unsigned char ME = MI->getOperand(4).getImm(); - bool useSubstituteMnemonic = false; - if (SH <= 31 && MB == 0 && ME == (31-SH)) { - O << "\tslwi "; useSubstituteMnemonic = true; - } - if (SH <= 31 && MB == (32-SH) && ME == 31) { - O << "\tsrwi "; useSubstituteMnemonic = true; - SH = 32-SH; - } - if (useSubstituteMnemonic) { - printOperand(MI, 0, O); - O << ", "; - printOperand(MI, 1, O); - O << ", " << (unsigned int)SH; - OutStreamer.EmitRawText(O.str()); - return; - } - } - - if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - O << "\tmr "; - printOperand(MI, 0, O); - O << ", "; - printOperand(MI, 1, O); - OutStreamer.EmitRawText(O.str()); - return; - } - - if (MI->getOpcode() == PPC::RLDICR) { - unsigned char SH = MI->getOperand(2).getImm(); - unsigned char ME = MI->getOperand(3).getImm(); - // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH - if (63-SH == ME) { - O << "\tsldi "; - printOperand(MI, 0, O); - O << ", "; - printOperand(MI, 1, O); - O << ", " << (unsigned int)SH; - OutStreamer.EmitRawText(O.str()); - return; - } - } - - printInstruction(MI, O); - OutStreamer.EmitRawText(O.str()); -} - -void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { - if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label. - return AsmPrinter::EmitFunctionEntryLabel(); - - // Emit an official procedure descriptor. - // FIXME 64-bit SVR4: Use MCSection here! - OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\"")); - OutStreamer.EmitRawText(StringRef("\t.align 3")); - OutStreamer.EmitLabel(CurrentFnSym); - OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) + - ",.TOC.@tocbase"); - OutStreamer.EmitRawText(StringRef("\t.previous")); - OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":"); -} - - -bool PPCLinuxAsmPrinter::doFinalization(Module &M) { - const TargetData *TD = TM.getTargetData(); - - bool isPPC64 = TD->getPointerSizeInBits() == 64; - - if (isPPC64 && !TOC.empty()) { - // FIXME 64-bit SVR4: Use MCSection here? - OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\"")); - - // FIXME: This is nondeterminstic! - for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), - E = TOC.end(); I != E; ++I) { - OutStreamer.EmitLabel(I->second); - OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) + - "[TC]," + I->first->getName()); - } - } - - return AsmPrinter::doFinalization(M); -} - -void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { - static const char *const CPUDirectives[] = { - "", - "ppc", - "ppc601", - "ppc602", - "ppc603", - "ppc7400", - "ppc750", - "ppc970", - "ppc64" - }; - - unsigned Directive = Subtarget.getDarwinDirective(); - if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970) - Directive = PPC::DIR_970; - if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400) - Directive = PPC::DIR_7400; - if (Subtarget.isPPC64() && Directive < PPC::DIR_970) - Directive = PPC::DIR_64; - assert(Directive <= PPC::DIR_64 && "Directive out of range."); - OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); - - // Prime text sections so they are adjacent. This reduces the likelihood a - // large data or debug section causes a branch to exceed 16M limit. - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); - OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); - if (TM.getRelocationModel() == Reloc::PIC_) { - OutStreamer.SwitchSection( - OutContext.getMachOSection("__TEXT", "__picsymbolstub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 32, SectionKind::getText())); - } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { - OutStreamer.SwitchSection( - OutContext.getMachOSection("__TEXT","__symbol_stub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 16, SectionKind::getText())); - } - OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); -} - -static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) { - // Remove $stub suffix, add $lazy_ptr. - SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5); - TmpStr += "$lazy_ptr"; - return Ctx.GetOrCreateSymbol(TmpStr.str()); -} - -static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { - // Add $tmp suffix to $stub, yielding $stub$tmp. - SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()); - TmpStr += "$tmp"; - return Ctx.GetOrCreateSymbol(TmpStr.str()); -} - -void PPCDarwinAsmPrinter:: -EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { - bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; - - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); - - // .lazy_symbol_pointer - const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection(); - - // Output stubs for dynamically-linked functions - if (TM.getRelocationModel() == Reloc::PIC_) { - const MCSection *StubSection = - OutContext.getMachOSection("__TEXT", "__picsymbolstub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 32, SectionKind::getText()); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.SwitchSection(StubSection); - EmitAlignment(4); - - MCSymbol *Stub = Stubs[i].first; - MCSymbol *RawSym = Stubs[i].second.getPointer(); - MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); - MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext); - - OutStreamer.EmitLabel(Stub); - OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - // FIXME: MCize this. - OutStreamer.EmitRawText(StringRef("\tmflr r0")); - OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName())); - OutStreamer.EmitLabel(AnonSymbol); - OutStreamer.EmitRawText(StringRef("\tmflr r11")); - OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+ - "-" + AnonSymbol->getName() + ")"); - OutStreamer.EmitRawText(StringRef("\tmtlr r0")); - - if (isPPC64) - OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + - "-" + AnonSymbol->getName() + ")(r11)"); - else - OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + - "-" + AnonSymbol->getName() + ")(r11)"); - OutStreamer.EmitRawText(StringRef("\tmtctr r12")); - OutStreamer.EmitRawText(StringRef("\tbctr")); - - OutStreamer.SwitchSection(LSPSection); - OutStreamer.EmitLabel(LazyPtr); - OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - if (isPPC64) - OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); - else - OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); - } - OutStreamer.AddBlankLine(); - return; - } - - const MCSection *StubSection = - OutContext.getMachOSection("__TEXT","__symbol_stub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 16, SectionKind::getText()); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - MCSymbol *Stub = Stubs[i].first; - MCSymbol *RawSym = Stubs[i].second.getPointer(); - MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); - - OutStreamer.SwitchSection(StubSection); - EmitAlignment(4); - OutStreamer.EmitLabel(Stub); - OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")"); - if (isPPC64) - OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + - ")(r11)"); - else - OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + - ")(r11)"); - OutStreamer.EmitRawText(StringRef("\tmtctr r12")); - OutStreamer.EmitRawText(StringRef("\tbctr")); - OutStreamer.SwitchSection(LSPSection); - OutStreamer.EmitLabel(LazyPtr); - OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - if (isPPC64) - OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); - else - OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); - } - - OutStreamer.AddBlankLine(); -} - - -bool PPCDarwinAsmPrinter::doFinalization(Module &M) { - bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; - - // Darwin/PPC always uses mach-o. - const TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); - MachineModuleInfoMachO &MMIMacho = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - - MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList(); - if (!Stubs.empty()) - EmitFunctionStubs(Stubs); - - if (MAI->doesSupportExceptionHandling() && MMI) { - // Add the (possibly multiple) personalities to the set of global values. - // Only referenced functions get into the Personalities list. - const std::vector<const Function*> &Personalities = MMI->getPersonalities(); - for (std::vector<const Function*>::const_iterator I = Personalities.begin(), - E = Personalities.end(); I != E; ++I) { - if (*I) { - MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = - MMIMacho.getGVStubEntry(NLPSym); - StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true); - } - } - } - - // Output stubs for dynamically-linked functions. - Stubs = MMIMacho.GetGVStubList(); - - // Output macho stubs for external and common global variables. - if (!Stubs.empty()) { - // Switch with ".non_lazy_symbol_pointer" directive. - OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); - EmitAlignment(isPPC64 ? 3 : 2); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .indirect_symbol _foo - MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; - OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol); - - if (MCSym.getInt()) - // External to current translation unit. - OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); - else - // Internal to current translation unit. - // - // When we place the LSDA into the TEXT section, the type info pointers - // need to be indirect and pc-rel. We accomplish this by using NLPs. - // However, sometimes the types are local to the file. So we need to - // fill in the value for the NLP in those cases. - OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); - } - - Stubs.clear(); - OutStreamer.AddBlankLine(); - } - - Stubs = MMIMacho.GetHiddenGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); - EmitAlignment(isPPC64 ? 3 : 2); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer.EmitValue(MCSymbolRefExpr:: - Create(Stubs[i].second.getPointer(), - OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); - } - - Stubs.clear(); - OutStreamer.AddBlankLine(); - } - - // Funny Darwin hack: This flag tells the linker that no global symbols - // contain code that falls through to other global symbols (e.g. the obvious - // implementation of multiple entry points). If this doesn't occur, the - // linker can safely perform dead code stripping. Since LLVM never generates - // code that does this, it is always safe to set. - OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); - - return AsmPrinter::doFinalization(M); -} - -/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code -/// for a MachineFunction to the given output stream, in a format that the -/// Darwin assembler can deal with. -/// -static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, - MCStreamer &Streamer) { - const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>(); - - if (Subtarget->isDarwin()) - return new PPCDarwinAsmPrinter(tm, Streamer); - return new PPCLinuxAsmPrinter(tm, Streamer); -} - -// Force static initialization. -extern "C" void LLVMInitializePowerPCAsmPrinter() { - TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); - TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); -} diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 7ffc5eb..f282579 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -4,6 +4,7 @@ tablegen(PPCGenInstrNames.inc -gen-instr-enums) tablegen(PPCGenRegisterNames.inc -gen-register-enums) tablegen(PPCGenAsmWriter.inc -gen-asm-writer) tablegen(PPCGenCodeEmitter.inc -gen-emitter) +tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header) tablegen(PPCGenRegisterInfo.inc -gen-register-desc) tablegen(PPCGenInstrInfo.inc -gen-instr-desc) @@ -12,14 +13,19 @@ tablegen(PPCGenCallingConv.inc -gen-callingconv) tablegen(PPCGenSubtarget.inc -gen-subtarget) add_llvm_target(PowerPCCodeGen + PPCAsmBackend.cpp + PPCAsmPrinter.cpp PPCBranchSelector.cpp PPCCodeEmitter.cpp PPCHazardRecognizers.cpp PPCInstrInfo.cpp PPCISelDAGToDAG.cpp PPCISelLowering.cpp + PPCFrameLowering.cpp PPCJITInfo.cpp PPCMCAsmInfo.cpp + PPCMCCodeEmitter.cpp + PPCMCInstLower.cpp PPCPredicates.cpp PPCRegisterInfo.cpp PPCSubtarget.cpp @@ -27,4 +33,5 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) -target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG) +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..389ea77 --- /dev/null +++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMPowerPCAsmPrinter + PPCInstPrinter.cpp + ) +add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen) diff --git a/lib/Target/PowerPC/InstPrinter/Makefile b/lib/Target/PowerPC/InstPrinter/Makefile new file mode 100644 index 0000000..f097e84 --- /dev/null +++ b/lib/Target/PowerPC/InstPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMPowerPCAsmPrinter + +# Hack: we need to include 'main' powerpc target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp new file mode 100644 index 0000000..c8db0c4 --- /dev/null +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -0,0 +1,292 @@ +//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an PPC MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "PPCInstPrinter.h" +#include "PPCPredicates.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "PPCGenAsmWriter.inc" + +StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + + +void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { + // Check for slwi/srwi mnemonics. + if (MI->getOpcode() == PPC::RLWINM) { + unsigned char SH = MI->getOperand(2).getImm(); + unsigned char MB = MI->getOperand(3).getImm(); + unsigned char ME = MI->getOperand(4).getImm(); + bool useSubstituteMnemonic = false; + if (SH <= 31 && MB == 0 && ME == (31-SH)) { + O << "\tslwi "; useSubstituteMnemonic = true; + } + if (SH <= 31 && MB == (32-SH) && ME == 31) { + O << "\tsrwi "; useSubstituteMnemonic = true; + SH = 32-SH; + } + if (useSubstituteMnemonic) { + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 1, O); + O << ", " << (unsigned int)SH; + return; + } + } + + if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + O << "\tmr "; + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 1, O); + return; + } + + if (MI->getOpcode() == PPC::RLDICR) { + unsigned char SH = MI->getOperand(2).getImm(); + unsigned char ME = MI->getOperand(3).getImm(); + // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH + if (63-SH == ME) { + O << "\tsldi "; + printOperand(MI, 0, O); + O << ", "; + printOperand(MI, 1, O); + O << ", " << (unsigned int)SH; + return; + } + } + + printInstruction(MI, O); +} + + +void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, + const char *Modifier) { + assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!"); + unsigned Code = MI->getOperand(OpNo).getImm(); + if (StringRef(Modifier) == "cc") { + switch ((PPC::Predicate)Code) { + default: assert(0 && "Invalid predicate"); + case PPC::PRED_ALWAYS: return; // Don't print anything for always. + case PPC::PRED_LT: O << "lt"; return; + case PPC::PRED_LE: O << "le"; return; + case PPC::PRED_EQ: O << "eq"; return; + case PPC::PRED_GE: O << "ge"; return; + case PPC::PRED_GT: O << "gt"; return; + case PPC::PRED_NE: O << "ne"; return; + case PPC::PRED_UN: O << "un"; return; + case PPC::PRED_NU: O << "nu"; return; + } + } + + assert(StringRef(Modifier) == "reg" && + "Need to specify 'cc' or 'reg' as predicate op modifier!"); + // Don't print the register for 'always'. + if (Code == PPC::PRED_ALWAYS) return; + printOperand(MI, OpNo+1, O); +} + +void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + char Value = MI->getOperand(OpNo).getImm(); + Value = (Value << (32-5)) >> (32-5); + O << (int)Value; +} + +void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned char Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 31 && "Invalid u5imm argument!"); + O << (unsigned int)Value; +} + +void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned char Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 63 && "Invalid u6imm argument!"); + O << (unsigned int)Value; +} + +void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << (short)MI->getOperand(OpNo).getImm(); +} + +void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << (unsigned short)MI->getOperand(OpNo).getImm(); +} + +void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) + O << (short)(MI->getOperand(OpNo).getImm()*4); + else + printOperand(MI, OpNo, O); +} + +void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (!MI->getOperand(OpNo).isImm()) + return printOperand(MI, OpNo, O); + + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + O << "$+"; + printAbsAddrOperand(MI, OpNo, O); +} + +void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << (int)MI->getOperand(OpNo).getImm()*4; +} + + +void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned CCReg = MI->getOperand(OpNo).getReg(); + unsigned RegNo; + switch (CCReg) { + default: assert(0 && "Unknown CR register"); + case PPC::CR0: RegNo = 0; break; + case PPC::CR1: RegNo = 1; break; + case PPC::CR2: RegNo = 2; break; + case PPC::CR3: RegNo = 3; break; + case PPC::CR4: RegNo = 4; break; + case PPC::CR5: RegNo = 5; break; + case PPC::CR6: RegNo = 6; break; + case PPC::CR7: RegNo = 7; break; + } + O << (0x80 >> RegNo); +} + +void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printSymbolLo(MI, OpNo, O); + O << '('; + if (MI->getOperand(OpNo+1).getReg() == PPC::R0) + O << "0"; + else + printOperand(MI, OpNo+1, O); + O << ')'; +} + +void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) + printS16X4ImmOperand(MI, OpNo, O); + else + printSymbolLo(MI, OpNo, O); + O << '('; + + if (MI->getOperand(OpNo+1).getReg() == PPC::R0) + O << "0"; + else + printOperand(MI, OpNo+1, O); + O << ')'; +} + + +void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + // When used as the base register, r0 reads constant zero rather than + // the value contained in the register. For this reason, the darwin + // assembler requires that we print r0 as 0 (no r) when used as the base. + if (MI->getOperand(OpNo).getReg() == PPC::R0) + O << "0"; + else + printOperand(MI, OpNo, O); + O << ", "; + printOperand(MI, OpNo+1, O); +} + + + +/// stripRegisterPrefix - This method strips the character prefix from a +/// register name so that only the number is left. Used by for linux asm. +static const char *stripRegisterPrefix(const char *RegName) { + switch (RegName[0]) { + case 'r': + case 'f': + case 'v': return RegName + 1; + case 'c': if (RegName[1] == 'r') return RegName + 2; + } + + return RegName; +} + +void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + const char *RegName = getRegisterName(Op.getReg()); + // The linux and AIX assembler does not take register prefixes. + if (!isDarwinSyntax()) + RegName = stripRegisterPrefix(RegName); + + O << RegName; + return; + } + + if (Op.isImm()) { + O << Op.getImm(); + return; + } + + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << *Op.getExpr(); +} + +void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) + return printS16ImmOperand(MI, OpNo, O); + + // FIXME: This is a terrible hack because we can't encode lo16() as an operand + // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower. + if (MI->getOperand(OpNo).isExpr() && + isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) { + O << "lo16("; + printOperand(MI, OpNo, O); + O << ')'; + } else { + printOperand(MI, OpNo, O); + } +} + +void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) + return printS16ImmOperand(MI, OpNo, O); + + // FIXME: This is a terrible hack because we can't encode lo16() as an operand + // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower. + if (MI->getOperand(OpNo).isExpr() && + isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) { + O << "ha16("; + printOperand(MI, OpNo, O); + O << ')'; + } else { + printOperand(MI, OpNo, O); + } +} + + diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h new file mode 100644 index 0000000..ebc10da --- /dev/null +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -0,0 +1,69 @@ +//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an PPC MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCINSTPRINTER_H +#define PPCINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + +class PPCInstPrinter : public MCInstPrinter { + // 0 -> AIX, 1 -> Darwin. + unsigned SyntaxVariant; +public: + PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant) + : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {} + + bool isDarwinSyntax() const { + return SyntaxVariant == 1; + } + + virtual void printInst(const MCInst *MI, raw_ostream &O); + virtual StringRef getOpcodeName(unsigned Opcode) const; + + static const char *getInstructionName(unsigned Opcode); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printPredicateOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, const char *Modifier); + + + void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // FIXME: Remove + void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; +} // end namespace llvm + +#endif diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index 1265f1d..030defe 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -16,8 +16,9 @@ BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \ PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ - PPCGenSubtarget.inc PPCGenCallingConv.inc + PPCGenSubtarget.inc PPCGenCallingConv.inc \ + PPCGenMCCodeEmitter.inc -DIRS = AsmPrinter TargetInfo +DIRS = InstPrinter TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 67e3a4a..7242f3a 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -15,24 +15,70 @@ #ifndef LLVM_TARGET_POWERPC_H #define LLVM_TARGET_POWERPC_H +#include <string> + // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC -#include "llvm/Target/TargetMachine.h" - namespace llvm { class PPCTargetMachine; class FunctionPass; class formatted_raw_ostream; + class JITCodeEmitter; + class Target; + class MachineInstr; + class AsmPrinter; + class MCInst; + class MCCodeEmitter; + class MCContext; + class TargetMachine; + class TargetAsmBackend; + + FunctionPass *createPPCBranchSelectionPass(); + FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &MCE); + MCCodeEmitter *createPPCMCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); + TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &); -FunctionPass *createPPCBranchSelectionPass(); -FunctionPass *createPPCISelDag(PPCTargetMachine &TM); -FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, - JITCodeEmitter &MCE); + void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP); + + extern Target ThePPC32Target; + extern Target ThePPC64Target; + + namespace PPCII { + + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // PPC Specific MachineOperand flags. + MO_NO_FLAG, + + /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$stub" symbol. This is used for calls + /// and jumps to external functions on Tiger and earlier. + MO_DARWIN_STUB = 1, + + /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) + MO_LO16 = 4, MO_HA16 = 8, -extern Target ThePPC32Target; -extern Target ThePPC64Target; + /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to + /// the function's picbase, e.g. lo16(symbol-picbase). + MO_PIC_FLAG = 16, + /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to + /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase). + MO_NLP_FLAG = 32, + + /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a + /// symbol with hidden visibility. This causes a different kind of + /// non-lazy-pointer to be generated. + MO_NLP_HIDDEN_FLAG = 64 + }; + } // end namespace PPCII + } // end namespace llvm; // Defines symbolic names for PowerPC registers. This defines a mapping from diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 27644b2..aabf494 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -99,8 +99,14 @@ def PPCInstrInfo : InstrInfo { let isLittleEndianEncoding = 1; } +def PPCAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} def PPC : Target { // Information about the instructions. let InstructionSet = PPCInstrInfo; + + let AssemblyWriters = [PPCAsmWriter]; } diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp new file mode 100644 index 0000000..c4d4ac9 --- /dev/null +++ b/lib/Target/PowerPC/PPCAsmBackend.cpp @@ -0,0 +1,119 @@ +//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +#include "PPC.h" +#include "PPCFixupKinds.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +namespace { +class PPCMachObjectWriter : public MCMachObjectTargetWriter { +public: + PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, + uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {} +}; + +class PPCAsmBackend : public TargetAsmBackend { +const Target &TheTarget; +public: + PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {} + + unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = { + // name offset bits flags + { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_ppc_lo16", 16, 16, 0 }, + { "fixup_ppc_ha16", 16, 16, 0 }, + { "fixup_ppc_lo14", 16, 14, 0 } + }; + + if (Kind < FirstTargetFixupKind) + return TargetAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + bool MayNeedRelaxation(const MCInst &Inst) const { + // FIXME. + return false; + } + + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + // FIXME. + assert(0 && "RelaxInstruction() unimplemented"); + } + + bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + // FIXME: Zero fill for now. That's not right, but at least will get the + // section size right. + for (uint64_t i = 0; i != Count; ++i) + OW->Write8(0); + return true; + } + + unsigned getPointerSize() const { + StringRef Name = TheTarget.getName(); + if (Name == "ppc64") return 8; + assert(Name == "ppc32" && "Unknown target name!"); + return 4; + } +}; +} // end anonymous namespace + + +// FIXME: This should be in a separate file. +namespace { + class DarwinPPCAsmBackend : public PPCAsmBackend { + public: + DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + assert(0 && "UNIMP"); + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + bool is64 = getPointerSize() == 8; + return createMachObjectWriter(new PPCMachObjectWriter( + /*Is64Bit=*/is64, + (is64 ? object::mach::CTM_PowerPC64 : + object::mach::CTM_PowerPC), + object::mach::CSPPC_ALL), + OS, /*IsLittleEndian=*/false); + } + + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } + }; +} // end anonymous namespace + + + + +TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T, + const std::string &TT) { + switch (Triple(TT).getOS()) { + case Triple::Darwin: + return new DarwinPPCAsmBackend(T); + default: + return 0; + } +} diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp new file mode 100644 index 0000000..8ed5d7f --- /dev/null +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -0,0 +1,696 @@ +//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PowerPC assembly language. This printer is +// the output mechanism used by `llc'. +// +// Documentation at http://developer.apple.com/documentation/DeveloperTools/ +// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asmprinter" +#include "PPC.h" +#include "PPCPredicates.h" +#include "PPCTargetMachine.h" +#include "PPCSubtarget.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/SmallString.h" +#include "InstPrinter/PPCInstPrinter.h" +using namespace llvm; + +namespace { + class PPCAsmPrinter : public AsmPrinter { + protected: + DenseMap<MCSymbol*, MCSymbol*> TOC; + const PPCSubtarget &Subtarget; + uint64_t TOCLabelID; + public: + explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {} + + virtual const char *getPassName() const { + return "PowerPC Assembly Printer"; + } + + + virtual void EmitInstruction(const MachineInstr *MI); + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; + } + }; + + /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux + class PPCLinuxAsmPrinter : public PPCAsmPrinter { + public: + explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : PPCAsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "Linux PPC Assembly Printer"; + } + + bool doFinalization(Module &M); + + virtual void EmitFunctionEntryLabel(); + }; + + /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac + /// OS X + class PPCDarwinAsmPrinter : public PPCAsmPrinter { + public: + explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : PPCAsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "Darwin PPC Assembly Printer"; + } + + bool doFinalization(Module &M); + void EmitStartOfAsmFile(Module &M); + + void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs); + }; +} // end of anonymous namespace + +/// stripRegisterPrefix - This method strips the character prefix from a +/// register name so that only the number is left. Used by for linux asm. +static const char *stripRegisterPrefix(const char *RegName) { + switch (RegName[0]) { + case 'r': + case 'f': + case 'v': return RegName + 1; + case 'c': if (RegName[1] == 'r') return RegName + 2; + } + + return RegName; +} + +void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MO.getType()) { + case MachineOperand::MO_Register: { + const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg()); + // Linux assembler (Others?) does not take register mnemonics. + // FIXME - What about special registers used in mfspr/mtspr? + if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName); + O << RegName; + return; + } + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + // FIXME: PIC relocation model + return; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_BlockAddress: + O << *GetBlockAddressSymbol(MO.getBlockAddress()); + return; + case MachineOperand::MO_ExternalSymbol: { + // Computing the address of an external symbol, not calling it. + if (TM.getRelocationModel() == Reloc::Static) { + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + return; + } + + MCSymbol *NLPSym = + OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+ + MO.getSymbolName()+"$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym); + if (StubSym.getPointer() == 0) + StubSym = MachineModuleInfoImpl:: + StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true); + + O << *NLPSym; + return; + } + case MachineOperand::MO_GlobalAddress: { + // Computing the address of a global symbol, not calling it. + const GlobalValue *GV = MO.getGlobal(); + MCSymbol *SymToPrint; + + // External or weakly linked global variables need non-lazily-resolved stubs + if (TM.getRelocationModel() != Reloc::Static && + (GV->isDeclaration() || GV->isWeakForLinker())) { + if (!GV->hasHiddenVisibility()) { + SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>() + .getGVStubEntry(SymToPrint); + if (StubSym.getPointer() == 0) + StubSym = MachineModuleInfoImpl:: + StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + } else if (GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) { + SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + + MachineModuleInfoImpl::StubValueTy &StubSym = + MMI->getObjFileInfo<MachineModuleInfoMachO>(). + getHiddenGVStubEntry(SymToPrint); + if (StubSym.getPointer() == 0) + StubSym = MachineModuleInfoImpl:: + StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + } else { + SymToPrint = Mang->getSymbol(GV); + } + } else { + SymToPrint = Mang->getSymbol(GV); + } + + O << *SymToPrint; + + printOffset(MO.getOffset(), O); + return; + } + + default: + O << "<unknown operand type: " << MO.getType() << ">"; + return; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'c': // Don't print "$" before a global var name or constant. + break; // PPC never has a prefix. + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + O << "i"; + return false; + } + } + + printOperand(MI, OpNo, O); + return false; +} + +// At the moment, all inline asm memory operands are a single register. +// In any case, the output of this routine should always be just one +// assembler operand. + +bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + assert(MI->getOperand(OpNo).isReg()); + O << "0("; + printOperand(MI, OpNo, O); + O << ")"; + return false; +} + + +/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to +/// the current output stream. +/// +void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MCInst TmpInst; + + // Lower multi-instruction pseudo operations. + switch (MI->getOpcode()) { + default: break; + case TargetOpcode::DBG_VALUE: { + if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return; + + SmallString<32> Str; + raw_svector_ostream O(Str); + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + O << V.getName(); + O << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O); + O << ']'; + O << "+"; + printOperand(MI, NOps-2, O); + OutStreamer.EmitRawText(O.str()); + return; + } + + case PPC::MovePCtoLR: + case PPC::MovePCtoLR8: { + // Transform %LR = MovePCtoLR + // Into this, where the label is the PIC base: + // bl L1$pb + // L1$pb: + MCSymbol *PICBase = MF->getPICBaseSymbol(); + + // Emit the 'bl'. + TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here. + + + // FIXME: We would like an efficient form for this, so we don't have to do + // a lot of extra uniquing. + TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr:: + Create(PICBase, OutContext))); + OutStreamer.EmitInstruction(TmpInst); + + // Emit the label. + OutStreamer.EmitLabel(PICBase); + return; + } + case PPC::LDtoc: { + // Transform %X3 = LDtoc <ga:@min1>, %X2 + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + + // Change the opcode to LD, and the global address operand to be a + // reference to the TOC entry we will synthesize later. + TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); + assert(MO.isGlobal()); + + // Map symbol -> label of TOC entry. + MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())]; + if (TOCEntry == 0) + TOCEntry = GetTempSymbol("C", TOCLabelID++); + + const MCExpr *Exp = + MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC, + OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + + case PPC::MFCRpseud: + // Transform: %R3 = MFCRpseud %CR7 + // Into: %R3 = MFCR ;; cr7 + OutStreamer.AddComment(PPCInstPrinter:: + getRegisterName(MI->getOperand(1).getReg())); + TmpInst.setOpcode(PPC::MFCR); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + OutStreamer.EmitInstruction(TmpInst); + return; + } + + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); +} + +void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { + if (!Subtarget.isPPC64()) // linux/ppc32 - Normal entry label. + return AsmPrinter::EmitFunctionEntryLabel(); + + // Emit an official procedure descriptor. + // FIXME 64-bit SVR4: Use MCSection here! + OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\"")); + OutStreamer.EmitRawText(StringRef("\t.align 3")); + OutStreamer.EmitLabel(CurrentFnSym); + OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) + + ",.TOC.@tocbase"); + OutStreamer.EmitRawText(StringRef("\t.previous")); + OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":"); +} + + +bool PPCLinuxAsmPrinter::doFinalization(Module &M) { + const TargetData *TD = TM.getTargetData(); + + bool isPPC64 = TD->getPointerSizeInBits() == 64; + + if (isPPC64 && !TOC.empty()) { + // FIXME 64-bit SVR4: Use MCSection here? + OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\"")); + + // FIXME: This is nondeterminstic! + for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), + E = TOC.end(); I != E; ++I) { + OutStreamer.EmitLabel(I->second); + OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) + + "[TC]," + I->first->getName()); + } + } + + return AsmPrinter::doFinalization(M); +} + +void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { + static const char *const CPUDirectives[] = { + "", + "ppc", + "ppc601", + "ppc602", + "ppc603", + "ppc7400", + "ppc750", + "ppc970", + "ppc64" + }; + + unsigned Directive = Subtarget.getDarwinDirective(); + if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970) + Directive = PPC::DIR_970; + if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400) + Directive = PPC::DIR_7400; + if (Subtarget.isPPC64() && Directive < PPC::DIR_970) + Directive = PPC::DIR_64; + assert(Directive <= PPC::DIR_64 && "Directive out of range."); + + // FIXME: This is a total hack, finish mc'izing the PPC backend. + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); + + // Prime text sections so they are adjacent. This reduces the likelihood a + // large data or debug section causes a branch to exceed 16M limit. + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); + OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); + if (TM.getRelocationModel() == Reloc::PIC_) { + OutStreamer.SwitchSection( + OutContext.getMachOSection("__TEXT", "__picsymbolstub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 32, SectionKind::getText())); + } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { + OutStreamer.SwitchSection( + OutContext.getMachOSection("__TEXT","__symbol_stub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 16, SectionKind::getText())); + } + OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); +} + +static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) { + // Remove $stub suffix, add $lazy_ptr. + SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5); + TmpStr += "$lazy_ptr"; + return Ctx.GetOrCreateSymbol(TmpStr.str()); +} + +static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { + // Add $tmp suffix to $stub, yielding $stub$tmp. + SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()); + TmpStr += "$tmp"; + return Ctx.GetOrCreateSymbol(TmpStr.str()); +} + +void PPCDarwinAsmPrinter:: +EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { + bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; + + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); + + // .lazy_symbol_pointer + const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection(); + + // Output stubs for dynamically-linked functions + if (TM.getRelocationModel() == Reloc::PIC_) { + const MCSection *StubSection = + OutContext.getMachOSection("__TEXT", "__picsymbolstub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 32, SectionKind::getText()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer.SwitchSection(StubSection); + EmitAlignment(4); + + MCSymbol *Stub = Stubs[i].first; + MCSymbol *RawSym = Stubs[i].second.getPointer(); + MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); + MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext); + + OutStreamer.EmitLabel(Stub); + OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + // FIXME: MCize this. + OutStreamer.EmitRawText(StringRef("\tmflr r0")); + OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName())); + OutStreamer.EmitLabel(AnonSymbol); + OutStreamer.EmitRawText(StringRef("\tmflr r11")); + OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+ + "-" + AnonSymbol->getName() + ")"); + OutStreamer.EmitRawText(StringRef("\tmtlr r0")); + + if (isPPC64) + OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + + "-" + AnonSymbol->getName() + ")(r11)"); + else + OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + + "-" + AnonSymbol->getName() + ")(r11)"); + OutStreamer.EmitRawText(StringRef("\tmtctr r12")); + OutStreamer.EmitRawText(StringRef("\tbctr")); + + OutStreamer.SwitchSection(LSPSection); + OutStreamer.EmitLabel(LazyPtr); + OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + if (isPPC64) + OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); + else + OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); + } + OutStreamer.AddBlankLine(); + return; + } + + const MCSection *StubSection = + OutContext.getMachOSection("__TEXT","__symbol_stub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 16, SectionKind::getText()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + MCSymbol *Stub = Stubs[i].first; + MCSymbol *RawSym = Stubs[i].second.getPointer(); + MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext); + + OutStreamer.SwitchSection(StubSection); + EmitAlignment(4); + OutStreamer.EmitLabel(Stub); + OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")"); + if (isPPC64) + OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + + ")(r11)"); + else + OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + + ")(r11)"); + OutStreamer.EmitRawText(StringRef("\tmtctr r12")); + OutStreamer.EmitRawText(StringRef("\tbctr")); + OutStreamer.SwitchSection(LSPSection); + OutStreamer.EmitLabel(LazyPtr); + OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + + if (isPPC64) + OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); + else + OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); + } + + OutStreamer.AddBlankLine(); +} + + +bool PPCDarwinAsmPrinter::doFinalization(Module &M) { + bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; + + // Darwin/PPC always uses mach-o. + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); + MachineModuleInfoMachO &MMIMacho = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList(); + if (!Stubs.empty()) + EmitFunctionStubs(Stubs); + + if (MAI->doesSupportExceptionHandling() && MMI) { + // Add the (possibly multiple) personalities to the set of global values. + // Only referenced functions get into the Personalities list. + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); + for (std::vector<const Function*>::const_iterator I = Personalities.begin(), + E = Personalities.end(); I != E; ++I) { + if (*I) { + MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); + MachineModuleInfoImpl::StubValueTy &StubSym = + MMIMacho.getGVStubEntry(NLPSym); + StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true); + } + } + } + + // Output stubs for dynamically-linked functions. + Stubs = MMIMacho.GetGVStubList(); + + // Output macho stubs for external and common global variables. + if (!Stubs.empty()) { + // Switch with ".non_lazy_symbol_pointer" directive. + OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); + EmitAlignment(isPPC64 ? 3 : 2); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .indirect_symbol _foo + MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; + OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol); + + if (MCSym.getInt()) + // External to current translation unit. + OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + else + // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info pointers + // need to be indirect and pc-rel. We accomplish this by using NLPs. + // However, sometimes the types are local to the file. So we need to + // fill in the value for the NLP in those cases. + OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + + Stubs = MMIMacho.GetHiddenGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + EmitAlignment(isPPC64 ? 3 : 2); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr:: + Create(Stubs[i].second.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never generates + // code that does this, it is always safe to set. + OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); + + return AsmPrinter::doFinalization(M); +} + +/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code +/// for a MachineFunction to the given output stream, in a format that the +/// Darwin assembler can deal with. +/// +static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, + MCStreamer &Streamer) { + const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>(); + + if (Subtarget->isDarwin()) + return new PPCDarwinAsmPrinter(tm, Streamer); + return new PPCLinuxAsmPrinter(tm, Streamer); +} + +static MCInstPrinter *createPPCMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI) { + return new PPCInstPrinter(MAI, SyntaxVariant); +} + + +// Force static initialization. +extern "C" void LLVMInitializePowerPCAsmPrinter() { + TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); + + TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter); +} diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index df9ab52..42232a0 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -50,13 +50,24 @@ namespace { /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. + unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; - unsigned getBinaryCodeForInstr(const MachineInstr &MI); - + + MachineRelocation GetRelocation(const MachineOperand &MO, + unsigned RelocID) const; + /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr - unsigned getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO); + const MachineOperand &MO) const; + + unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + + unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; const char *getPassName() const { return "PowerPC Machine Code Emitter"; } @@ -67,10 +78,6 @@ namespace { /// emitBasicBlock - emits the given MachineBasicBlock to memory /// void emitBasicBlock(MachineBasicBlock &MBB); - - /// getValueBit - return the particular bit of Val - /// - unsigned getValueBit(int64_t Val, unsigned bit) { return (Val >> bit) & 1; } }; } @@ -128,125 +135,127 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { } } -unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) { +unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); + return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg()); +} - unsigned rv = 0; // Return value; defaults to 0 for unhandled cases - // or things that get fixed up later by the JIT. - if (MO.isReg()) { - rv = PPCRegisterInfo::getRegisterNumbering(MO.getReg()); +MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, + unsigned RelocID) const { + // If in PIC mode, we need to encode the negated address of the + // 'movepctolr' into the unrelocated field. After relocation, we'll have + // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm + // field, we get &gv. This doesn't happen for branch relocations, which are + // always implicitly pc relative. + intptr_t Cst = 0; + if (TM.getRelocationModel() == Reloc::PIC_) { + assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); + Cst = -(intptr_t)MovePCtoLROffset - 4; + } + + if (MO.isGlobal()) + return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID, + const_cast<GlobalValue *>(MO.getGlobal()), + Cst, isa<Function>(MO.getGlobal())); + if (MO.isSymbol()) + return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + RelocID, MO.getSymbolName(), Cst); + if (MO.isCPI()) + return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + RelocID, MO.getIndex(), Cst); - // Special encoding for MTCRF and MFOCRF, which uses a bit mask for the - // register, not the register number directly. - if ((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && - (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)) { - rv = 0x80 >> rv; - } - } else if (MO.isImm()) { - rv = MO.getImm(); - } else if (MO.isGlobal() || MO.isSymbol() || - MO.isCPI() || MO.isJTI()) { - unsigned Reloc = 0; - if (MI.getOpcode() == PPC::BL_Darwin || MI.getOpcode() == PPC::BL8_Darwin || - MI.getOpcode() == PPC::BL_SVR4 || MI.getOpcode() == PPC::BL8_ELF || - MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8) - Reloc = PPC::reloc_pcrel_bx; - else { - if (TM.getRelocationModel() == Reloc::PIC_) { - assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); - } - switch (MI.getOpcode()) { - default: MI.dump(); llvm_unreachable("Unknown instruction for relocation!"); - case PPC::LIS: - case PPC::LIS8: - case PPC::ADDIS: - case PPC::ADDIS8: - Reloc = PPC::reloc_absolute_high; // Pointer to symbol - break; - case PPC::LI: - case PPC::LI8: - case PPC::LA: - // Loads. - case PPC::LBZ: - case PPC::LBZ8: - case PPC::LHA: - case PPC::LHA8: - case PPC::LHZ: - case PPC::LHZ8: - case PPC::LWZ: - case PPC::LWZ8: - case PPC::LFS: - case PPC::LFD: - - // Stores. - case PPC::STB: - case PPC::STB8: - case PPC::STH: - case PPC::STH8: - case PPC::STW: - case PPC::STW8: - case PPC::STFS: - case PPC::STFD: - Reloc = PPC::reloc_absolute_low; - break; - - case PPC::LWA: - case PPC::LD: - case PPC::STD: - case PPC::STD_32: - Reloc = PPC::reloc_absolute_low_ix; - break; - } - } + if (MO.isMBB()) + return MachineRelocation::getBB(MCE.getCurrentPCOffset(), + RelocID, MO.getMBB()); + + assert(MO.isJTI()); + return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + RelocID, MO.getIndex(), Cst); +} - MachineRelocation R; - if (MO.isGlobal()) { - R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - const_cast<GlobalValue *>(MO.getGlobal()), 0, - isa<Function>(MO.getGlobal())); - } else if (MO.isSymbol()) { - R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), - Reloc, MO.getSymbolName(), 0); - } else if (MO.isCPI()) { - R = MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), - Reloc, MO.getIndex(), 0); - } else { - assert(MO.isJTI()); - R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), - Reloc, MO.getIndex(), 0); - } +unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx)); + return 0; +} - // If in PIC mode, we need to encode the negated address of the - // 'movepctolr' into the unrelocated field. After relocation, we'll have - // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm - // field, we get &gv. This doesn't happen for branch relocations, which are - // always implicitly pc relative. - if (TM.getRelocationModel() == Reloc::PIC_ && Reloc != PPC::reloc_pcrel_bx){ - assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); - R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4); - } - MCE.addRelocation(R); - - } else if (MO.isMBB()) { - unsigned Reloc = 0; - unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::B || Opcode == PPC::BL_Darwin || - Opcode == PPC::BLA_Darwin|| Opcode == PPC::BL_SVR4 || - Opcode == PPC::BLA_SVR4) - Reloc = PPC::reloc_pcrel_bx; - else // BCC instruction - Reloc = PPC::reloc_pcrel_bcx; - - MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), - Reloc, MO.getMBB())); - } else { -#ifndef NDEBUG - errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; -#endif - llvm_unreachable(0); - } +unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx)); + return 0; +} - return rv; +unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high)); + return 0; +} + +unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low)); + return 0; +} + +unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Encode (imm, reg) as a memri, which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16; + + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits; + + // Add a fixup for the displacement field. + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low)); + return RegBits; +} + +unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Encode (imm, reg) as a memrix, which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14; + + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits; + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix)); + return RegBits; +} + + +unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + + if (MO.isReg()) { + // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. + // The GPR operand should come through here though. + assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || + MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); + return PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + } + + assert(MO.isImm() && + "Relocation required in an instruction that we cannot encode!"); + return MO.getImm(); } #include "PPCGenCodeEmitter.inc" diff --git a/lib/Target/PowerPC/PPCFixupKinds.h b/lib/Target/PowerPC/PPCFixupKinds.h new file mode 100644 index 0000000..b3c889e --- /dev/null +++ b/lib/Target/PowerPC/PPCFixupKinds.h @@ -0,0 +1,45 @@ +//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PPC_PPCFIXUPKINDS_H +#define LLVM_PPC_PPCFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace PPC { +enum Fixups { + // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b' + // and 'bl'. + fixup_ppc_br24 = FirstTargetFixupKind, + + /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional + /// branches. + fixup_ppc_brcond14, + + /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs + /// like 'li'. + fixup_ppc_lo16, + + /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs + /// like 'lis'. + fixup_ppc_ha16, + + /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs + /// like 'std'. + fixup_ppc_lo14, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} +} + +#endif diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h deleted file mode 100644 index 7587b03..0000000 --- a/lib/Target/PowerPC/PPCFrameInfo.h +++ /dev/null @@ -1,300 +0,0 @@ -//===-- PPCFrameInfo.h - Define TargetFrameInfo for PowerPC -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef POWERPC_FRAMEINFO_H -#define POWERPC_FRAMEINFO_H - -#include "PPC.h" -#include "PPCSubtarget.h" -#include "llvm/Target/TargetFrameInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/STLExtras.h" - -namespace llvm { - -class PPCFrameInfo: public TargetFrameInfo { - const TargetMachine &TM; - -public: - PPCFrameInfo(const TargetMachine &tm, bool LP64) - : TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), TM(tm) { - } - - /// getReturnSaveOffset - Return the previous frame offset to save the - /// return address. - static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) { - if (isDarwinABI) - return isPPC64 ? 16 : 8; - // SVR4 ABI: - return isPPC64 ? 16 : 4; - } - - /// getFramePointerSaveOffset - Return the previous frame offset to save the - /// frame pointer. - static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { - // For the Darwin ABI: - // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area - // for saving the frame pointer (if needed.) While the published ABI has - // not used this slot since at least MacOSX 10.2, there is older code - // around that does use it, and that needs to continue to work. - if (isDarwinABI) - return isPPC64 ? -8U : -4U; - - // SVR4 ABI: First slot in the general register save area. - return isPPC64 ? -8U : -4U; - } - - /// getLinkageSize - Return the size of the PowerPC ABI linkage area. - /// - static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { - if (isDarwinABI || isPPC64) - return 6 * (isPPC64 ? 8 : 4); - - // SVR4 ABI: - return 8; - } - - /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI - /// argument area. - static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) { - // For the Darwin ABI / 64-bit SVR4 ABI: - // The prolog code of the callee may store up to 8 GPR argument registers to - // the stack, allowing va_start to index over them in memory if its varargs. - // Because we cannot tell if this is needed on the caller side, we have to - // conservatively assume that it is needed. As such, make sure we have at - // least enough stack space for the caller to store the 8 GPRs. - if (isDarwinABI || isPPC64) - return 8 * (isPPC64 ? 8 : 4); - - // 32-bit SVR4 ABI: - // There is no default stack allocated for the 8 first GPR arguments. - return 0; - } - - /// getMinCallFrameSize - Return the minimum size a call frame can be using - /// the PowerPC ABI. - static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) { - // The call frame needs to be at least big enough for linkage and 8 args. - return getLinkageSize(isPPC64, isDarwinABI) + - getMinCallArgumentsSize(isPPC64, isDarwinABI); - } - - // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. - const SpillSlot * - getCalleeSavedSpillSlots(unsigned &NumEntries) const { - if (TM.getSubtarget<PPCSubtarget>().isDarwinABI()) { - NumEntries = 1; - if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { - static const SpillSlot darwin64Offsets = {PPC::X31, -8}; - return &darwin64Offsets; - } else { - static const SpillSlot darwinOffsets = {PPC::R31, -4}; - return &darwinOffsets; - } - } - - // Early exit if not using the SVR4 ABI. - if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { - NumEntries = 0; - return 0; - } - - static const SpillSlot Offsets[] = { - // Floating-point register save area offsets. - {PPC::F31, -8}, - {PPC::F30, -16}, - {PPC::F29, -24}, - {PPC::F28, -32}, - {PPC::F27, -40}, - {PPC::F26, -48}, - {PPC::F25, -56}, - {PPC::F24, -64}, - {PPC::F23, -72}, - {PPC::F22, -80}, - {PPC::F21, -88}, - {PPC::F20, -96}, - {PPC::F19, -104}, - {PPC::F18, -112}, - {PPC::F17, -120}, - {PPC::F16, -128}, - {PPC::F15, -136}, - {PPC::F14, -144}, - - // General register save area offsets. - {PPC::R31, -4}, - {PPC::R30, -8}, - {PPC::R29, -12}, - {PPC::R28, -16}, - {PPC::R27, -20}, - {PPC::R26, -24}, - {PPC::R25, -28}, - {PPC::R24, -32}, - {PPC::R23, -36}, - {PPC::R22, -40}, - {PPC::R21, -44}, - {PPC::R20, -48}, - {PPC::R19, -52}, - {PPC::R18, -56}, - {PPC::R17, -60}, - {PPC::R16, -64}, - {PPC::R15, -68}, - {PPC::R14, -72}, - - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, - - // VRSAVE save area offset. - {PPC::VRSAVE, -4}, - - // Vector register save area - {PPC::V31, -16}, - {PPC::V30, -32}, - {PPC::V29, -48}, - {PPC::V28, -64}, - {PPC::V27, -80}, - {PPC::V26, -96}, - {PPC::V25, -112}, - {PPC::V24, -128}, - {PPC::V23, -144}, - {PPC::V22, -160}, - {PPC::V21, -176}, - {PPC::V20, -192} - }; - - static const SpillSlot Offsets64[] = { - // Floating-point register save area offsets. - {PPC::F31, -8}, - {PPC::F30, -16}, - {PPC::F29, -24}, - {PPC::F28, -32}, - {PPC::F27, -40}, - {PPC::F26, -48}, - {PPC::F25, -56}, - {PPC::F24, -64}, - {PPC::F23, -72}, - {PPC::F22, -80}, - {PPC::F21, -88}, - {PPC::F20, -96}, - {PPC::F19, -104}, - {PPC::F18, -112}, - {PPC::F17, -120}, - {PPC::F16, -128}, - {PPC::F15, -136}, - {PPC::F14, -144}, - - // General register save area offsets. - // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit - // mode? - {PPC::R31, -4}, - {PPC::R30, -12}, - {PPC::R29, -20}, - {PPC::R28, -28}, - {PPC::R27, -36}, - {PPC::R26, -44}, - {PPC::R25, -52}, - {PPC::R24, -60}, - {PPC::R23, -68}, - {PPC::R22, -76}, - {PPC::R21, -84}, - {PPC::R20, -92}, - {PPC::R19, -100}, - {PPC::R18, -108}, - {PPC::R17, -116}, - {PPC::R16, -124}, - {PPC::R15, -132}, - {PPC::R14, -140}, - - {PPC::X31, -8}, - {PPC::X30, -16}, - {PPC::X29, -24}, - {PPC::X28, -32}, - {PPC::X27, -40}, - {PPC::X26, -48}, - {PPC::X25, -56}, - {PPC::X24, -64}, - {PPC::X23, -72}, - {PPC::X22, -80}, - {PPC::X21, -88}, - {PPC::X20, -96}, - {PPC::X19, -104}, - {PPC::X18, -112}, - {PPC::X17, -120}, - {PPC::X16, -128}, - {PPC::X15, -136}, - {PPC::X14, -144}, - - // CR save area offset. - // FIXME SVR4: Disable CR save area for now. -// {PPC::CR2, -4}, -// {PPC::CR3, -4}, -// {PPC::CR4, -4}, -// {PPC::CR2LT, -4}, -// {PPC::CR2GT, -4}, -// {PPC::CR2EQ, -4}, -// {PPC::CR2UN, -4}, -// {PPC::CR3LT, -4}, -// {PPC::CR3GT, -4}, -// {PPC::CR3EQ, -4}, -// {PPC::CR3UN, -4}, -// {PPC::CR4LT, -4}, -// {PPC::CR4GT, -4}, -// {PPC::CR4EQ, -4}, -// {PPC::CR4UN, -4}, - - // VRSAVE save area offset. - {PPC::VRSAVE, -4}, - - // Vector register save area - {PPC::V31, -16}, - {PPC::V30, -32}, - {PPC::V29, -48}, - {PPC::V28, -64}, - {PPC::V27, -80}, - {PPC::V26, -96}, - {PPC::V25, -112}, - {PPC::V24, -128}, - {PPC::V23, -144}, - {PPC::V22, -160}, - {PPC::V21, -176}, - {PPC::V20, -192} - }; - - if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { - NumEntries = array_lengthof(Offsets64); - - return Offsets64; - } else { - NumEntries = array_lengthof(Offsets); - - return Offsets; - } - } -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp new file mode 100644 index 0000000..6aca6b0 --- /dev/null +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -0,0 +1,971 @@ +//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PPC implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PPCFrameLowering.h" +#include "PPCInstrInfo.h" +#include "PPCMachineFunctionInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +// FIXME This disables some code that aligns the stack to a boundary bigger than +// the default (16 bytes on Darwin) when there is a stack local of greater +// alignment. This does not currently work, because the delta between old and +// new stack pointers is added to offsets that reference incoming parameters +// after the prolog is generated, and the code that does that doesn't handle a +// variable delta. You don't want to do that anyway; a better approach is to +// reserve another register that retains to the incoming stack pointer, and +// reference parameters relative to that. +#define ALIGN_STACK 0 + + +/// VRRegNo - Map from a numbered VR register to its enum value. +/// +static const unsigned short VRRegNo[] = { + PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , + PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, + PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 +}; + +/// RemoveVRSaveCode - We have found that this function does not need any code +/// to manipulate the VRSAVE register, even though it uses vector registers. +/// This can happen when the only registers used are known to be live in or out +/// of the function. Remove all of the VRSAVE related code from the function. +static void RemoveVRSaveCode(MachineInstr *MI) { + MachineBasicBlock *Entry = MI->getParent(); + MachineFunction *MF = Entry->getParent(); + + // We know that the MTVRSAVE instruction immediately follows MI. Remove it. + MachineBasicBlock::iterator MBBI = MI; + ++MBBI; + assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); + MBBI->eraseFromParent(); + + bool RemovedAllMTVRSAVEs = true; + // See if we can find and remove the MTVRSAVE instruction from all of the + // epilog blocks. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { + // If last instruction is a return instruction, add an epilogue + if (!I->empty() && I->back().getDesc().isReturn()) { + bool FoundIt = false; + for (MBBI = I->end(); MBBI != I->begin(); ) { + --MBBI; + if (MBBI->getOpcode() == PPC::MTVRSAVE) { + MBBI->eraseFromParent(); // remove it. + FoundIt = true; + break; + } + } + RemovedAllMTVRSAVEs &= FoundIt; + } + } + + // If we found and removed all MTVRSAVE instructions, remove the read of + // VRSAVE as well. + if (RemovedAllMTVRSAVEs) { + MBBI = MI; + assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); + --MBBI; + assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); + MBBI->eraseFromParent(); + } + + // Finally, nuke the UPDATE_VRSAVE. + MI->eraseFromParent(); +} + +// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the +// instruction selector. Based on the vector registers that have been used, +// transform this into the appropriate ORI instruction. +static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { + MachineFunction *MF = MI->getParent()->getParent(); + DebugLoc dl = MI->getDebugLoc(); + + unsigned UsedRegMask = 0; + for (unsigned i = 0; i != 32; ++i) + if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i])) + UsedRegMask |= 1 << (31-i); + + // Live in and live out values already must be in the mask, so don't bother + // marking them. + for (MachineRegisterInfo::livein_iterator + I = MF->getRegInfo().livein_begin(), + E = MF->getRegInfo().livein_end(); I != E; ++I) { + unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first); + if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. + UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + } + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) { + unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I); + if (VRRegNo[RegNo] == *I) // If this really is a vector reg. + UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + } + + // If no registers are used, turn this into a copy. + if (UsedRegMask == 0) { + // Remove all VRSAVE code. + RemoveVRSaveCode(MI); + return; + } + + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + + if ((UsedRegMask & 0xFFFF) == UsedRegMask) { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask); + } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask >> 16); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask >> 16); + } else { + if (DstReg != SrcReg) + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg) + .addImm(UsedRegMask >> 16); + else + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) + .addReg(SrcReg, RegState::Kill) + .addImm(UsedRegMask >> 16); + + BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) + .addReg(DstReg, RegState::Kill) + .addImm(UsedRegMask & 0xFFFF); + } + + // Remove the old UPDATE_VRSAVE instruction. + MI->eraseFromParent(); +} + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + unsigned MaxAlign = MFI->getMaxAlignment(); + unsigned TargetAlign = getStackAlignment(); + unsigned AlignMask = TargetAlign - 1; // + + // If we are a leaf function, and use up to 224 bytes of stack space, + // don't have a frame pointer, calls, or dynamic alloca then we do not need + // to adjust the stack pointer (we fit in the Red Zone). + bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); + // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. + if (!DisableRedZone && + FrameSize <= 224 && // Fits in red zone. + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. + // No need for frame + MFI->setStackSize(0); + return; + } + + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // Maximum call frame needs to be at least big enough for linkage and 8 args. + unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(), + Subtarget.isDarwinABI()); + maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + + // Update frame info. + MFI->setStackSize(FrameSize); +} + +// hasFP - Return true if the specified function actually has a dedicated frame +// pointer register. +bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + // FIXME: This is pretty much broken by design: hasFP() might be called really + // early, before the stack layout was calculated and thus hasFP() might return + // true or false here depending on the time of call. + return (MFI->getStackSize()) && needsFP(MF); +} + +// needsFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Naked functions have no stack frame pushed, so we don't have a frame + // pointer. + if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + return false; + + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || + (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); +} + + +void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + + MachineModuleInfo &MMI = MF.getMMI(); + DebugLoc dl; + bool needsFrameMoves = MMI.hasDebugInfo() || + !MF.getFunction()->doesNotThrow() || + UnwindTablesMandatory; + + // Prepare for frame info. + MCSymbol *FrameLabel = 0; + + // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, + // process it. + for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { + if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { + HandleVRSaveUpdate(MBBI, TII); + break; + } + } + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + + // Work out frame sizes. + // FIXME: determineFrameLayout() may change the frame size. This should be + // moved upper, to some hook. + determineFrameLayout(MF); + unsigned FrameSize = MFI->getStackSize(); + + int NegFrameSize = -FrameSize; + + // Get processor type. + bool isPPC64 = Subtarget.isPPC64(); + // Get operating system + bool isDarwinABI = Subtarget.isDarwinABI(); + // Check if the link register (LR) must be saved. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + bool MustSaveLR = FI->mustSaveLR(); + // Do we have a frame pointer for this function? + bool HasFP = hasFP(MF); + + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); + + int FPOffset = 0; + if (HasFP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = FFI->getObjectOffset(FPIndex); + } else { + FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + } + } + + if (isPPC64) { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(PPC::X31) + .addImm(FPOffset/4) + .addReg(PPC::X1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) + .addReg(PPC::X0) + .addImm(LROffset / 4) + .addReg(PPC::X1); + } else { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) + .addReg(PPC::R31) + .addImm(FPOffset) + .addReg(PPC::R1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) + .addReg(PPC::R0) + .addImm(LROffset) + .addReg(PPC::R1); + } + + // Skip if a leaf routine. + if (!FrameSize) return; + + // Get stack alignments. + unsigned TargetAlign = getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Adjust stack pointer: r1 += NegFrameSize. + // If there is a preferred stack alignment, align R1 now + if (!isPPC64) { + // PPC32. + if (ALIGN_STACK && MaxAlign > TargetAlign) { + assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && + "Invalid alignment!"); + assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); + + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) + .addReg(PPC::R1) + .addImm(0) + .addImm(32 - Log2_32(MaxAlign)) + .addImm(31); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(NegFrameSize); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + .addReg(PPC::R1) + .addReg(PPC::R1) + .addReg(PPC::R0); + } else if (isInt<16>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) + .addReg(PPC::R1) + .addImm(NegFrameSize) + .addReg(PPC::R1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + .addReg(PPC::R1) + .addReg(PPC::R1) + .addReg(PPC::R0); + } + } else { // PPC64. + if (ALIGN_STACK && MaxAlign > TargetAlign) { + assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && + "Invalid alignment!"); + assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); + + BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) + .addReg(PPC::X1) + .addImm(0) + .addImm(64 - Log2_32(MaxAlign)); + BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) + .addReg(PPC::X0) + .addImm(NegFrameSize); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + .addReg(PPC::X1) + .addReg(PPC::X1) + .addReg(PPC::X0); + } else if (isInt<16>(NegFrameSize)) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) + .addReg(PPC::X1) + .addImm(NegFrameSize / 4) + .addReg(PPC::X1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) + .addImm(NegFrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(NegFrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + .addReg(PPC::X1) + .addReg(PPC::X1) + .addReg(PPC::X0); + } + } + + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + + // Add the "machine moves" for the instructions we generated above, but in + // reverse order. + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer becomes valid. + FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel); + + // Show update of SP. + if (NegFrameSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + } else { + MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31); + Moves.push_back(MachineMove(FrameLabel, SP, SP)); + } + + if (HasFP) { + MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset); + MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31); + Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); + } + + if (MustSaveLR) { + MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); + MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR); + Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc)); + } + } + + MCSymbol *ReadyLabel = 0; + + // If there is a frame pointer, copy R1 into R31 + if (HasFP) { + if (!isPPC64) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) + .addReg(PPC::R1) + .addReg(PPC::R1); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31) + .addReg(PPC::X1) + .addReg(PPC::X1); + } + + if (needsFrameMoves) { + ReadyLabel = MMI.getContext().CreateTempSymbol(); + + // Mark effective beginning of when frame pointer is ready. + BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); + + MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : + (isPPC64 ? PPC::X1 : PPC::R1)); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); + } + } + + if (needsFrameMoves) { + MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel; + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + } + } +} + +void PPCFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI != MBB.end() && "Returning block has no terminator"); + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc dl; + + assert((RetOpcode == PPC::BLR || + RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8) && + "Can only insert epilog into returning blocks"); + + // Get alignment info so we know how to restore r1 + const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned TargetAlign = getStackAlignment(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Get the number of bytes allocated from the FrameInfo. + int FrameSize = MFI->getStackSize(); + + // Get processor type. + bool isPPC64 = Subtarget.isPPC64(); + // Get operating system + bool isDarwinABI = Subtarget.isDarwinABI(); + // Check if the link register (LR) has been saved. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + bool MustSaveLR = FI->mustSaveLR(); + // Do we have a frame pointer for this function? + bool HasFP = hasFP(MF); + + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); + + int FPOffset = 0; + if (HasFP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = FFI->getObjectOffset(FPIndex); + } else { + FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + } + } + + bool UsesTCRet = RetOpcode == PPC::TCRETURNri || + RetOpcode == PPC::TCRETURNdi || + RetOpcode == PPC::TCRETURNai || + RetOpcode == PPC::TCRETURNri8 || + RetOpcode == PPC::TCRETURNdi8 || + RetOpcode == PPC::TCRETURNai8; + + if (UsesTCRet) { + int MaxTCRetDelta = FI->getTailCallSPDelta(); + MachineOperand &StackAdjust = MBBI->getOperand(1); + assert(StackAdjust.isImm() && "Expecting immediate value."); + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int Delta = StackAdj - MaxTCRetDelta; + assert((Delta >= 0) && "Delta must be positive"); + if (MaxTCRetDelta>0) + FrameSize += (StackAdj +Delta); + else + FrameSize += StackAdj; + } + + if (FrameSize) { + // The loaded (or persistent) stack pointer value is offset by the 'stwu' + // on entry to the function. Add this offset back now. + if (!isPPC64) { + // If this function contained a fastcc call and GuaranteedTailCallOpt is + // enabled (=> hasFastCall()==true) the fastcc call might contain a tail + // call which invalidates the stack pointer value in SP(0). So we use the + // value of R31 in this case. + if (FI->hasFastCall() && isInt<16>(FrameSize)) { + assert(hasFP(MF) && "Expecting a valid the frame pointer."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R31).addImm(FrameSize); + } else if(FI->hasFastCall()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) + .addReg(PPC::R0, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4)) + .addReg(PPC::R1) + .addReg(PPC::R31) + .addReg(PPC::R0); + } else if (isInt<16>(FrameSize) && + (!ALIGN_STACK || TargetAlign >= MaxAlign) && + !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) + .addReg(PPC::R1).addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1) + .addImm(0).addReg(PPC::R1); + } + } else { + if (FI->hasFastCall() && isInt<16>(FrameSize)) { + assert(hasFP(MF) && "Expecting a valid the frame pointer."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) + .addReg(PPC::X31).addImm(FrameSize); + } else if(FI->hasFastCall()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) + .addImm(FrameSize >> 16); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) + .addReg(PPC::X0, RegState::Kill) + .addImm(FrameSize & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8)) + .addReg(PPC::X1) + .addReg(PPC::X31) + .addReg(PPC::X0); + } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign && + !MFI->hasVarSizedObjects()) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) + .addReg(PPC::X1).addImm(FrameSize); + } else { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1) + .addImm(0).addReg(PPC::X1); + } + } + } + + if (isPPC64) { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) + .addImm(LROffset/4).addReg(PPC::X1); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) + .addImm(FPOffset/4).addReg(PPC::X1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0); + } else { + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0) + .addImm(LROffset).addReg(PPC::R1); + + if (HasFP) + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) + .addImm(FPOffset).addReg(PPC::R1); + + if (MustSaveLR) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0); + } + + // Callee pop calling convention. Pop parameter/linkage area. Used for tail + // call optimization + if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR && + MF.getFunction()->getCallingConv() == CallingConv::Fast) { + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + unsigned CallerAllocatedAmt = FI->getMinReservedArea(); + unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1; + unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; + unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI; + + if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { + BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg).addImm(CallerAllocatedAmt); + } else { + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CallerAllocatedAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CallerAllocatedAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) + .addReg(StackReg) + .addReg(FPReg) + .addReg(TmpReg); + } + } else if (RetOpcode == PPC::TCRETURNdi) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri) { + MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); + } else if (RetOpcode == PPC::TCRETURNai) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); + } else if (RetOpcode == PPC::TCRETURNdi8) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + } else if (RetOpcode == PPC::TCRETURNri8) { + MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); + } else if (RetOpcode == PPC::TCRETURNai8) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); + } +} + +void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(PPC::R1, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + +static bool spillsCR(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->isCRSpilled(); +} + +/// MustSaveLR - Return true if this function requires that we save the LR +/// register onto the stack in the prolog and restore it in the epilog of the +/// function. +static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { + const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); + + // We need a save/restore of LR if there is any def of LR (which is + // defined by calls, including the PIC setup sequence), or if there is + // some use of the LR stack slot (e.g. for builtin_return_address). + // (LR comes in 32 and 64 bit versions.) + MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); + return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); +} + +void +PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + + // Save and clear the LR state. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + unsigned LR = RegInfo->getRARegister(); + FI->setMustSaveLR(MustSaveLR(MF, LR)); + MF.getRegInfo().setPhysRegUnused(LR); + + // Save R31 if necessary + int FPSI = FI->getFramePointerSaveIndex(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // If the frame pointer save index hasn't been defined yet. + if (!FPSI && needsFP(MF)) { + // Find out what the fix offset of the frame pointer save area. + int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI); + // Allocate the frame index for frame pointer save area. + FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); + // Save the result. + FI->setFramePointerSaveIndex(FPSI); + } + + // Reserve stack space to move the linkage area to in case of a tail call. + int TCSPDelta = 0; + if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { + MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); + } + + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + // FIXME: this doesn't actually check stack size, so is a bit pessimistic + // FIXME: doesn't detect whether or not we need to spill vXX, which requires + // r0 for now. + + if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable. + if (needsFP(MF) || spillsCR(MF)) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } +} + +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) + const { + // Early exit if not using the SVR4 ABI. + if (!Subtarget.isSVR4ABI()) + return; + + // Get callee saved register information. + MachineFrameInfo *FFI = MF.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + + // Early exit if no callee saved registers are modified! + if (CSI.empty() && !needsFP(MF)) { + return; + } + + unsigned MinGPR = PPC::R31; + unsigned MinG8R = PPC::X31; + unsigned MinFPR = PPC::F31; + unsigned MinVR = PPC::V31; + + bool HasGPSaveArea = false; + bool HasG8SaveArea = false; + bool HasFPSaveArea = false; + bool HasCRSaveArea = false; + bool HasVRSAVESaveArea = false; + bool HasVRSaveArea = false; + + SmallVector<CalleeSavedInfo, 18> GPRegs; + SmallVector<CalleeSavedInfo, 18> G8Regs; + SmallVector<CalleeSavedInfo, 18> FPRegs; + SmallVector<CalleeSavedInfo, 18> VRegs; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (PPC::GPRCRegisterClass->contains(Reg)) { + HasGPSaveArea = true; + + GPRegs.push_back(CSI[i]); + + if (Reg < MinGPR) { + MinGPR = Reg; + } + } else if (PPC::G8RCRegisterClass->contains(Reg)) { + HasG8SaveArea = true; + + G8Regs.push_back(CSI[i]); + + if (Reg < MinG8R) { + MinG8R = Reg; + } + } else if (PPC::F8RCRegisterClass->contains(Reg)) { + HasFPSaveArea = true; + + FPRegs.push_back(CSI[i]); + + if (Reg < MinFPR) { + MinFPR = Reg; + } +// FIXME SVR4: Disable CR save area for now. + } else if (PPC::CRBITRCRegisterClass->contains(Reg) + || PPC::CRRCRegisterClass->contains(Reg)) { +// HasCRSaveArea = true; + } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) { + HasVRSAVESaveArea = true; + } else if (PPC::VRRCRegisterClass->contains(Reg)) { + HasVRSaveArea = true; + + VRegs.push_back(CSI[i]); + + if (Reg < MinVR) { + MinVR = Reg; + } + } else { + llvm_unreachable("Unknown RegisterClass!"); + } + } + + PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); + + int64_t LowerBound = 0; + + // Take into account stack space reserved for tail calls. + int TCSPDelta = 0; + if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { + LowerBound = TCSPDelta; + } + + // The Floating-point register save area is right below the back chain word + // of the previous stack frame. + if (HasFPSaveArea) { + for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { + int FI = FPRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8; + } + + // Check whether the frame pointer register is allocated. If so, make sure it + // is spilled to the correct offset. + if (needsFP(MF)) { + HasGPSaveArea = true; + + int FI = PFI->getFramePointerSaveIndex(); + assert(FI && "No Frame Pointer Save Slot!"); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + // General register save area starts right below the Floating-point + // register save area. + if (HasGPSaveArea || HasG8SaveArea) { + // Move general register save area spill slots down, taking into account + // the size of the Floating-point register save area. + for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { + int FI = GPRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + // Move general register save area spill slots down, taking into account + // the size of the Floating-point register save area. + for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { + int FI = G8Regs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + unsigned MinReg = + std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR), + PPCRegisterInfo::getRegisterNumbering(MinG8R)); + + if (Subtarget.isPPC64()) { + LowerBound -= (31 - MinReg + 1) * 8; + } else { + LowerBound -= (31 - MinReg + 1) * 4; + } + } + + // The CR save area is below the general register save area. + if (HasCRSaveArea) { + // FIXME SVR4: Is it actually possible to have multiple elements in CSI + // which have the CR/CRBIT register class? + // Adjust the frame index of the CR spill slot. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + + if (PPC::CRBITRCRegisterClass->contains(Reg) || + PPC::CRRCRegisterClass->contains(Reg)) { + int FI = CSI[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } + + LowerBound -= 4; // The CR save area is always 4 bytes long. + } + + if (HasVRSAVESaveArea) { + // FIXME SVR4: Is it actually possible to have multiple elements in CSI + // which have the VRSAVE register class? + // Adjust the frame index of the VRSAVE spill slot. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + + if (PPC::VRSAVERCRegisterClass->contains(Reg)) { + int FI = CSI[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } + + LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. + } + + if (HasVRSaveArea) { + // Insert alignment padding, we need 16-byte alignment. + LowerBound = (LowerBound - 15) & ~(15); + + for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { + int FI = VRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } +} diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h new file mode 100644 index 0000000..0c18de1 --- /dev/null +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -0,0 +1,322 @@ +//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_FRAMEINFO_H +#define POWERPC_FRAMEINFO_H + +#include "PPC.h" +#include "PPCSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/STLExtras.h" + +namespace llvm { + class PPCSubtarget; + +class PPCFrameLowering: public TargetFrameLowering { + const PPCSubtarget &Subtarget; + +public: + PPCFrameLowering(const PPCSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), + Subtarget(sti) { + } + + void determineFrameLayout(MachineFunction &MF) const; + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const; + bool needsFP(const MachineFunction &MF) const; + void getInitialFrameState(std::vector<MachineMove> &Moves) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + bool targetHandlesStackFrameRounding() const { return true; } + + /// getReturnSaveOffset - Return the previous frame offset to save the + /// return address. + static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) { + if (isDarwinABI) + return isPPC64 ? 16 : 8; + // SVR4 ABI: + return isPPC64 ? 16 : 4; + } + + /// getFramePointerSaveOffset - Return the previous frame offset to save the + /// frame pointer. + static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { + // For the Darwin ABI: + // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area + // for saving the frame pointer (if needed.) While the published ABI has + // not used this slot since at least MacOSX 10.2, there is older code + // around that does use it, and that needs to continue to work. + if (isDarwinABI) + return isPPC64 ? -8U : -4U; + + // SVR4 ABI: First slot in the general register save area. + return isPPC64 ? -8U : -4U; + } + + /// getLinkageSize - Return the size of the PowerPC ABI linkage area. + /// + static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { + if (isDarwinABI || isPPC64) + return 6 * (isPPC64 ? 8 : 4); + + // SVR4 ABI: + return 8; + } + + /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI + /// argument area. + static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) { + // For the Darwin ABI / 64-bit SVR4 ABI: + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + if (isDarwinABI || isPPC64) + return 8 * (isPPC64 ? 8 : 4); + + // 32-bit SVR4 ABI: + // There is no default stack allocated for the 8 first GPR arguments. + return 0; + } + + /// getMinCallFrameSize - Return the minimum size a call frame can be using + /// the PowerPC ABI. + static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) { + // The call frame needs to be at least big enough for linkage and 8 args. + return getLinkageSize(isPPC64, isDarwinABI) + + getMinCallArgumentsSize(isPPC64, isDarwinABI); + } + + // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. + const SpillSlot * + getCalleeSavedSpillSlots(unsigned &NumEntries) const { + if (Subtarget.isDarwinABI()) { + NumEntries = 1; + if (Subtarget.isPPC64()) { + static const SpillSlot darwin64Offsets = {PPC::X31, -8}; + return &darwin64Offsets; + } else { + static const SpillSlot darwinOffsets = {PPC::R31, -4}; + return &darwinOffsets; + } + } + + // Early exit if not using the SVR4 ABI. + if (!Subtarget.isSVR4ABI()) { + NumEntries = 0; + return 0; + } + + static const SpillSlot Offsets[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + {PPC::R31, -4}, + {PPC::R30, -8}, + {PPC::R29, -12}, + {PPC::R28, -16}, + {PPC::R27, -20}, + {PPC::R26, -24}, + {PPC::R25, -28}, + {PPC::R24, -32}, + {PPC::R23, -36}, + {PPC::R22, -40}, + {PPC::R21, -44}, + {PPC::R20, -48}, + {PPC::R19, -52}, + {PPC::R18, -56}, + {PPC::R17, -60}, + {PPC::R16, -64}, + {PPC::R15, -68}, + {PPC::R14, -72}, + + // CR save area offset. + // FIXME SVR4: Disable CR save area for now. +// {PPC::CR2, -4}, +// {PPC::CR3, -4}, +// {PPC::CR4, -4}, +// {PPC::CR2LT, -4}, +// {PPC::CR2GT, -4}, +// {PPC::CR2EQ, -4}, +// {PPC::CR2UN, -4}, +// {PPC::CR3LT, -4}, +// {PPC::CR3GT, -4}, +// {PPC::CR3EQ, -4}, +// {PPC::CR3UN, -4}, +// {PPC::CR4LT, -4}, +// {PPC::CR4GT, -4}, +// {PPC::CR4EQ, -4}, +// {PPC::CR4UN, -4}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192} + }; + + static const SpillSlot Offsets64[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit + // mode? + {PPC::R31, -4}, + {PPC::R30, -12}, + {PPC::R29, -20}, + {PPC::R28, -28}, + {PPC::R27, -36}, + {PPC::R26, -44}, + {PPC::R25, -52}, + {PPC::R24, -60}, + {PPC::R23, -68}, + {PPC::R22, -76}, + {PPC::R21, -84}, + {PPC::R20, -92}, + {PPC::R19, -100}, + {PPC::R18, -108}, + {PPC::R17, -116}, + {PPC::R16, -124}, + {PPC::R15, -132}, + {PPC::R14, -140}, + + {PPC::X31, -8}, + {PPC::X30, -16}, + {PPC::X29, -24}, + {PPC::X28, -32}, + {PPC::X27, -40}, + {PPC::X26, -48}, + {PPC::X25, -56}, + {PPC::X24, -64}, + {PPC::X23, -72}, + {PPC::X22, -80}, + {PPC::X21, -88}, + {PPC::X20, -96}, + {PPC::X19, -104}, + {PPC::X18, -112}, + {PPC::X17, -120}, + {PPC::X16, -128}, + {PPC::X15, -136}, + {PPC::X14, -144}, + + // CR save area offset. + // FIXME SVR4: Disable CR save area for now. +// {PPC::CR2, -4}, +// {PPC::CR3, -4}, +// {PPC::CR4, -4}, +// {PPC::CR2LT, -4}, +// {PPC::CR2GT, -4}, +// {PPC::CR2EQ, -4}, +// {PPC::CR2UN, -4}, +// {PPC::CR3LT, -4}, +// {PPC::CR3GT, -4}, +// {PPC::CR3EQ, -4}, +// {PPC::CR3UN, -4}, +// {PPC::CR4LT, -4}, +// {PPC::CR4GT, -4}, +// {PPC::CR4EQ, -4}, +// {PPC::CR4UN, -4}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192} + }; + + if (Subtarget.isPPC64()) { + NumEntries = array_lengthof(Offsets64); + + return Offsets64; + } else { + NumEntries = array_lengthof(Offsets); + + return Offsets; + } + } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index db11fde..0de5844 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -26,7 +26,7 @@ using namespace llvm; // // This models the dispatch group formation of the PPC970 processor. Dispatch // groups are bundles of up to five instructions that can contain various mixes -// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one +// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one // branch instruction per-cycle. // // There are a number of restrictions to dispatch group formation: some @@ -55,14 +55,14 @@ PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) void PPCHazardRecognizer970::EndDispatchGroup() { DEBUG(errs() << "=== Start of dispatch group\n"); NumIssued = 0; - + // Structural hazard info. HasCTRSet = false; NumStores = 0; } -PPCII::PPC970_Unit +PPCII::PPC970_Unit PPCHazardRecognizer970::GetInstrType(unsigned Opcode, bool &isFirst, bool &isSingle, bool &isCracked, @@ -72,14 +72,14 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, return PPCII::PPC970_Pseudo; } Opcode = ~Opcode; - + const TargetInstrDesc &TID = TII.get(Opcode); - + isLoad = TID.mayLoad(); isStore = TID.mayStore(); - + uint64_t TSFlags = TID.TSFlags; - + isFirst = TSFlags & PPCII::PPC970_First; isSingle = TSFlags & PPCII::PPC970_Single; isCracked = TSFlags & PPCII::PPC970_Cracked; @@ -96,7 +96,7 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { return true; if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) return true; - + // Okay, we don't have an exact match, if this is an indexed offset, see if // we have overlap (which happens during fp->int conversion for example). if (StorePtr2[i] == Ptr2) { @@ -122,26 +122,28 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { /// instructions that wouldn't terminate the dispatch group that would cause a /// pipeline flush. ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: -getHazardType(SUnit *SU) { - const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); +getHazardType(SUnit *SU, int Stalls) { + assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); + + const SDNode *Node = SU->getNode()->getGluedMachineNode(); bool isFirst, isSingle, isCracked, isLoad, isStore; - PPCII::PPC970_Unit InstrType = + PPCII::PPC970_Unit InstrType = GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, isLoad, isStore); - if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; + if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; unsigned Opcode = Node->getMachineOpcode(); // We can only issue a PPC970_First/PPC970_Single instruction (such as // crand/mtspr/etc) if this is the first cycle of the dispatch group. if (NumIssued != 0 && (isFirst || isSingle)) return Hazard; - + // If this instruction is cracked into two ops by the decoder, we know that // it is not a branch and that it cannot issue if 3 other instructions are // already in the dispatch group. if (isCracked && NumIssued > 2) return Hazard; - + switch (InstrType) { default: llvm_unreachable("Unknown instruction type!"); case PPCII::PPC970_FXU: @@ -159,11 +161,11 @@ getHazardType(SUnit *SU) { case PPCII::PPC970_BRU: break; } - + // Do not allow MTCTR and BCTRL to be in the same dispatch group. if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) return NoopHazard; - + // If this is a load following a store, make sure it's not to the same or // overlapping address. if (isLoad && NumStores) { @@ -212,27 +214,27 @@ getHazardType(SUnit *SU) { LoadSize = 16; break; } - - if (isLoadOfStoredAddress(LoadSize, + + if (isLoadOfStoredAddress(LoadSize, Node->getOperand(0), Node->getOperand(1))) return NoopHazard; } - + return NoHazard; } void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { - const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); + const SDNode *Node = SU->getNode()->getGluedMachineNode(); bool isFirst, isSingle, isCracked, isLoad, isStore; - PPCII::PPC970_Unit InstrType = + PPCII::PPC970_Unit InstrType = GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, isLoad, isStore); - if (InstrType == PPCII::PPC970_Pseudo) return; + if (InstrType == PPCII::PPC970_Pseudo) return; unsigned Opcode = Node->getMachineOpcode(); // Update structural hazard information. if (Opcode == PPC::MTCTR) HasCTRSet = true; - + // Track the address stored to. if (isStore) { unsigned ThisStoreSize; @@ -278,22 +280,22 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { ThisStoreSize = 16; break; } - + StoreSize[NumStores] = ThisStoreSize; StorePtr1[NumStores] = Node->getOperand(1); StorePtr2[NumStores] = Node->getOperand(2); ++NumStores; } - + if (InstrType == PPCII::PPC970_BRU || isSingle) NumIssued = 4; // Terminate a d-group. ++NumIssued; - + // If this instruction is cracked into two ops by the decoder, remember that // we issued two pieces. if (isCracked) ++NumIssued; - + if (NumIssued == 5) EndDispatchGroup(); } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 74bf8e5..2f81f0f 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -19,7 +19,7 @@ #include "PPCInstrInfo.h" namespace llvm { - + /// PPCHazardRecognizer970 - This class defines a finite state automata that /// models the dispatch logic on the PowerPC 970 (aka G5) processor. This /// promotes good dispatch group formation and implements noop insertion to @@ -28,14 +28,14 @@ namespace llvm { /// or storing then loading from the same address within a dispatch group. class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { const TargetInstrInfo &TII; - + unsigned NumIssued; // Number of insts issued, including advanced cycles. - + // Various things that can cause a structural hazard. - + // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. bool HasCTRSet; - + // StoredPtr - Keep track of the address of any store. If we see a load from // the same address (or one that aliases it), disallow the store. We can have // up to four stores in one dispatch group, hence we track up to 4. @@ -45,24 +45,24 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { SDValue StorePtr1[4], StorePtr2[4]; unsigned StoreSize[4]; unsigned NumStores; - + public: PPCHazardRecognizer970(const TargetInstrInfo &TII); - virtual HazardType getHazardType(SUnit *SU); + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); - + private: /// EndDispatchGroup - Called when we are finishing a new dispatch group. /// void EndDispatchGroup(); - + /// GetInstrType - Classify the specified powerpc opcode according to its /// pipeline. PPCII::PPC970_Unit GetInstrType(unsigned Opcode, bool &isFirst, bool &isSingle,bool &isCracked, bool &isLoad, bool &isStore); - + bool isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const; }; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 00eebb8..faae9b2 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,7 +16,6 @@ #include "PPC.h" #include "PPCPredicates.h" #include "PPCTargetMachine.h" -#include "PPCHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -49,16 +48,16 @@ namespace { : SelectionDAGISel(tm), TM(tm), PPCLowering(*TM.getTargetLowering()), PPCSubTarget(*TM.getSubtargetImpl()) {} - + virtual bool runOnMachineFunction(MachineFunction &MF) { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; SelectionDAGISel::runOnMachineFunction(MF); - + InsertVRSaveCode(MF); return true; } - + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -70,13 +69,13 @@ namespace { inline SDValue getI64Imm(uint64_t Imm) { return CurDAG->getTargetConstant(Imm, MVT::i64); } - + /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy()); } - - /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s + + /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s /// with any number of 0s on either side. The 1s are allowed to wrap from /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. /// 0x0F0F0000 is not, since all 1s are not contiguous. @@ -87,15 +86,15 @@ namespace { /// rotate and mask opcode and mask operation. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME); - + /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); - + // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. SDNode *Select(SDNode *N); - + SDNode *SelectBitfieldInsert(SDNode *N); /// SelectCC - Select a comparison of the specified values with the @@ -104,42 +103,39 @@ namespace { /// SelectAddrImm - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement [r+imm]. - bool SelectAddrImm(SDNode *Op, SDValue N, SDValue &Disp, + bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG); } - + /// SelectAddrImmOffs - Return true if the operand is valid for a preinc /// immediate field. Because preinc imms have already been validated, just /// accept it. - bool SelectAddrImmOffs(SDNode *Op, SDValue N, SDValue &Out) const { + bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { Out = N; return true; } - + /// SelectAddrIdx - Given the specified addressed, check to see if it can be /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. - bool SelectAddrIdx(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index) { + bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG); } - + /// SelectAddrIdxOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. - bool SelectAddrIdxOnly(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Index) { + bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } /// SelectAddrImmShift - Returns true if the address N can be represented by /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable /// for use by STD and friends. - bool SelectAddrImmShift(SDNode *Op, SDValue N, SDValue &Disp, - SDValue &Base) { + bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a @@ -151,29 +147,16 @@ namespace { OutOps.push_back(Op); return false; } - - SDValue BuildSDIVSequence(SDNode *N); - SDValue BuildUDIVSequence(SDNode *N); - + void InsertVRSaveCode(MachineFunction &MF); virtual const char *getPassName() const { return "PowerPC DAG->DAG Pattern Instruction Selection"; - } - - /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for - /// this target when scheduling the DAG. - virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { - // Should use subtarget info to pick the right hazard recognizer. For - // now, always return a PPC970 recognizer. - const TargetInstrInfo *II = TM.getInstrInfo(); - assert(II && "No InstrInfo?"); - return new PPCHazardRecognizer970(*II); } // Include the pieces autogenerated from the target description. #include "PPCGenDAGISel.inc" - + private: SDNode *SelectSETCC(SDNode *N); }; @@ -184,19 +167,20 @@ private: /// check to see if we need to save/restore VRSAVE. If so, do it. void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Check to see if this function uses vector registers, which means we have to - // save and restore the VRSAVE register and update it with the regs we use. + // save and restore the VRSAVE register and update it with the regs we use. // // In this case, there will be virtual registers of vector type created // by the scheduler. Detect them now. bool HasVectorVReg = false; - for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, - e = RegInfo->getLastVirtReg()+1; i != e; ++i) - if (RegInfo->getRegClass(i) == &PPC::VRRCRegClass) { + for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { HasVectorVReg = true; break; } + } if (!HasVectorVReg) return; // nothing to do. - + // If we have a vector register, we want to emit code into the entry and exit // blocks to save and restore the VRSAVE register. We do this here (instead // of marking all vector instructions as clobbering VRSAVE) for two reasons: @@ -211,7 +195,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // function and one for the value after having bits or'd into it. unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); - + const TargetInstrInfo &TII = *TM.getInstrInfo(); MachineBasicBlock &EntryBB = *Fn.begin(); DebugLoc dl; @@ -224,21 +208,21 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), UpdatedVRSAVE).addReg(InVRSAVE); BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); - + // Find all return blocks, outputting a restore in each epilog. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { if (!BB->empty() && BB->back().getDesc().isReturn()) { IP = BB->end(); --IP; - + // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = IP; while (I2 != BB->begin() && (--I2)->getDesc().isTerminator()) IP = I2; - + // Emit: MTVRSAVE InVRSave BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); - } + } } } @@ -344,8 +328,8 @@ bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { return false; } -bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, - bool isShiftMask, unsigned &SH, +bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, + bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME) { // Don't even go down this path for i64, since different logic will be // necessary for rldicl/rldicr/rldimi. @@ -358,13 +342,13 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, if (N->getNumOperands() != 2 || !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) return false; - + if (Opcode == ISD::SHL) { // apply shift left to mask if it comes first if (isShiftMask) Mask = Mask << Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu << Shift); - } else if (Opcode == ISD::SRL) { + } else if (Opcode == ISD::SRL) { // apply shift right to mask if it comes first if (isShiftMask) Mask = Mask >> Shift; // determine which bits are made indeterminant by shift @@ -376,7 +360,7 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, } else { return false; } - + // if the mask doesn't intersect any Indeterminant bits if (Mask && !(Mask & Indeterminant)) { SH = Shift & 31; @@ -392,14 +376,14 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); - + APInt LKZ, LKO, RKZ, RKO; CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO); CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO); - + unsigned TargetMask = LKZ.getZExtValue(); unsigned InsertMask = RKZ.getZExtValue(); - + if ((TargetMask | InsertMask) == 0xFFFFFFFF) { unsigned Op0Opc = Op0.getOpcode(); unsigned Op1Opc = Op1.getOpcode(); @@ -427,7 +411,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { std::swap(TargetMask, InsertMask); } } - + unsigned MB, ME; if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) { SDValue Tmp1, Tmp2; @@ -463,7 +447,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl) { // Always select the LHS. unsigned Opc; - + if (LHS.getValueType() == MVT::i32) { unsigned Imm; if (CC == ISD::SETEQ || CC == ISD::SETNE) { @@ -476,11 +460,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, if (isInt<16>((int)Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm(Imm & 0xFFFF)), 0); - + // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: // lis r2, 4660 - // ori r2, r2, 22136 + // ori r2, r2, 22136 // cmpw cr0, r3, r2 // Since we are just comparing for equality, we can emit this instead: // xoris r0,r3,0x1234 @@ -517,11 +501,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, if (isInt<16>(Imm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI32Imm(Imm & 0xFFFF)), 0); - + // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: // lis r2, 4660 - // ori r2, r2, 22136 + // ori r2, r2, 22136 // cmpd cr0, r3, r2 // Since we are just comparing for equality, we can emit this instead: // xoris r0,r3,0x1234 @@ -610,9 +594,9 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { case ISD::SETUNE: case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO - case ISD::SETUEQ: - case ISD::SETOGE: - case ISD::SETOLE: + case ISD::SETUEQ: + case ISD::SETOGE: + case ISD::SETOLE: case ISD::SETONE: llvm_unreachable("Invalid branch code: should be expanded by legalize"); // These are invalid for floating point. Assume integer. @@ -641,9 +625,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } case ISD::SETNE: { SDValue AD = - SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U)), 0); - return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, + return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); } case ISD::SETLT: { @@ -663,16 +647,16 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { switch (CC) { default: break; case ISD::SETEQ: - Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(1)), 0); - return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, - SDValue(CurDAG->getMachineNode(PPC::LI, dl, + return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + SDValue(CurDAG->getMachineNode(PPC::LI, dl, MVT::i32, getI32Imm(0)), 0), Op.getValue(1)); case ISD::SETNE: { Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); - SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, Op, getI32Imm(~0U)); return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, SDValue(AD, 1)); @@ -687,35 +671,35 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), + Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); - return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, + return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1)); } } } } - + bool Inv; int OtherCondIdx; unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx); SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); SDValue IntCR; - + // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); - + SDValue InFlag(0, 0); // Null incoming flag value. - CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, + CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); - + if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1) IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); else IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, CR7Reg, CCReg), 0); - + SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), getI32Imm(31), getI32Imm(31) }; if (OtherCondIdx == -1 && !Inv) @@ -734,7 +718,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // Get the other bit of the comparison. Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31); - SDValue OtherCond = + SDValue OtherCond = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond); @@ -750,7 +734,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; - + case ISD::Constant: { if (N->getValueType(0) == MVT::i64) { // Get 64 bit value. @@ -759,12 +743,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { unsigned Remainder = 0; // Assume no shift required. unsigned Shift = 0; - + // If it can't be represented as a 32 bit value. if (!isInt<32>(Imm)) { Shift = CountTrailingZeros_64(Imm); int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; - + // If the shifted value fits 32 bits. if (isInt<32>(ImmSh)) { // Go with the shifted value. @@ -776,14 +760,14 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { Imm >>= 32; } } - + // Intermediate operand. SDNode *Result; // Handle first 32 bits. unsigned Lo = Imm & 0xFFFF; unsigned Hi = (Imm >> 16) & 0xFFFF; - + // Simple value. if (isInt<16>(Imm)) { // Just the Lo bits. @@ -799,7 +783,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // Just the Hi bits. Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); } - + // If no shift, we're done. if (!Shift) return Result; @@ -815,22 +799,22 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if ((Hi = (Remainder >> 16) & 0xFFFF)) { Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Hi)); - } + } if ((Lo = Remainder & 0xFFFF)) { Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), getI32Imm(Lo)); } - + return Result; } break; } - + case ISD::SETCC: return SelectSETCC(N); case PPCISD::GlobalBaseReg: return getGlobalBaseReg(); - + case ISD::FrameIndex: { int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); @@ -852,11 +836,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, N->getOperand(0), InFlag); } - + case ISD::SDIV: { // FIXME: since this depends on the setting of the carry flag from the srawi // we should really be making notes about that for the scheduler. - // FIXME: It sure would be nice if we could cheaply recognize the + // FIXME: It sure would be nice if we could cheaply recognize the // srl/add/sra pattern the dag combiner will generate for this as // sra/addze rather than having to handle sdiv ourselves. oh well. unsigned Imm; @@ -864,13 +848,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue N0 = N->getOperand(0); if ((signed)Imm > 0 && isPowerOf2_32(Imm)) { SDNode *Op = - CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag, + CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, N0, getI32Imm(Log2_32(Imm))); - return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, + return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), SDValue(Op, 1)); } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) { SDNode *Op = - CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag, + CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, N0, getI32Imm(Log2_32(-Imm))); SDValue PT = SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32, @@ -879,24 +863,24 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT); } } - + // Other cases are autogenerated. break; } - + case ISD::LOAD: { // Handle preincrement loads. LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); - + // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) break; - + SDValue Offset = LD->getOffset(); if (isa<ConstantSDNode>(Offset) || Offset.getOpcode() == ISD::TargetGlobalAddress) { - + unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; if (LD->getValueType(0) != MVT::i64) { @@ -923,7 +907,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case MVT::i8: Opcode = PPC::LBZU8; break; } } - + SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; @@ -935,7 +919,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { llvm_unreachable("R+R preindex loads not supported yet!"); } } - + case ISD::AND: { unsigned Imm, Imm2, SH, MB, ME; @@ -950,7 +934,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // If this is just a masked value where the input is not handled above, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm if (isInt32Immediate(N->getOperand(1), Imm) && - isRunOfOnes(Imm, MB, ME) && + isRunOfOnes(Imm, MB, ME) && N->getOperand(0).getOpcode() != ISD::ROTL) { SDValue Val = N->getOperand(0); SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; @@ -963,7 +947,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } // ISD::OR doesn't get all the bitfield insertion fun. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert - if (isInt32Immediate(N->getOperand(1), Imm) && + if (isInt32Immediate(N->getOperand(1), Imm) && N->getOperand(0).getOpcode() == ISD::OR && isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { unsigned MB, ME; @@ -975,7 +959,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); } } - + // Other cases are autogenerated. break; } @@ -983,7 +967,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (N->getValueType(0) == MVT::i32) if (SDNode *I = SelectBitfieldInsert(N)) return I; - + // Other cases are autogenerated. break; case ISD::SHL: { @@ -994,25 +978,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } - + // Other cases are autogenerated. break; } case ISD::SRL: { unsigned Imm, SH, MB, ME; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && - isRotateAndMask(N, Imm, true, SH, MB, ME)) { + isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } - + // Other cases are autogenerated. break; } case ISD::SELECT_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); - + // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) @@ -1022,7 +1006,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // FIXME: Implement this optzn for PPC64. N->getValueType(0) == MVT::i32) { SDNode *Tmp = - CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, N->getOperand(0), getI32Imm(~0U)); return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), N->getOperand(0), @@ -1064,7 +1048,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case ISD::BR_CC: { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); - SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode, + SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode, N->getOperand(4), N->getOperand(0) }; return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4); } @@ -1078,13 +1062,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain); } } - + return SelectCode(N); } -/// createPPCISelDag - This pass converts a legalized DAG into a +/// createPPCISelDag - This pass converts a legalized DAG into a /// PowerPC-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 14d1b15..8f623b8 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -38,17 +38,17 @@ #include "llvm/DerivedTypes.h" using namespace llvm; -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT, +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT, - EVT &LocVT, +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT, - EVT &LocVT, +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); @@ -73,6 +73,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setUseUnderscoreSetJmp(true); setUseUnderscoreLongJmp(true); + // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all + // arguments are at least 4/8 bytes aligned. + setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4); + // Set up the register classes. addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); @@ -174,10 +178,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); + setOperationAction(ISD::BITCAST, MVT::i64, Expand); + setOperationAction(ISD::BITCAST, MVT::f64, Expand); // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -545,7 +549,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, +bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 8, 24); @@ -554,7 +558,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). -bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, +bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, bool isUnary) { if (!isUnary) return isVMerge(N, UnitSize, 0, 16); @@ -569,7 +573,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { "PPC only supports shuffles by bytes!"); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - + // Find the first non-undef value in the shuffle mask. unsigned i; for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) @@ -607,7 +611,7 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. unsigned ElementBase = N->getMaskElt(0); - + // FIXME: Handle UNDEF elements too! if (ElementBase >= 16) return false; @@ -635,7 +639,7 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) { APInt APVal, APUndef; unsigned BitSize; bool HasAnyUndefs; - + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) return CFP->getValueAPF().isNegZero(); @@ -1054,7 +1058,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, VT = LD->getMemoryVT(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - ST = ST; Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); } else @@ -1094,158 +1097,126 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, // LowerOperation implementation //===----------------------------------------------------------------------===// -SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { - EVT PtrVT = Op.getValueType(); - ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - const Constant *C = CP->getConstVal(); - SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); - SDValue Zero = DAG.getConstant(0, PtrVT); - // FIXME there isn't really any debug info here - DebugLoc dl = Op.getDebugLoc(); - - const TargetMachine &TM = DAG.getTarget(); - - SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, CPI, Zero); - SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, CPI, Zero); - - // If this is a non-darwin platform, we don't support non-static relo models - // yet. - if (TM.getRelocationModel() == Reloc::Static || - !TM.getSubtarget<PPCSubtarget>().isDarwin()) { - // Generate non-pic code that has direct accesses to the constant pool. - // The address of the global is just (hi(&g)+lo(&g)). - return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); +/// GetLabelAccessInfo - Return true if we should reference labels using a +/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. +static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, + unsigned &LoOpFlags, const GlobalValue *GV = 0) { + HiOpFlags = PPCII::MO_HA16; + LoOpFlags = PPCII::MO_LO16; + + // Don't use the pic base if not in PIC relocation model. Or if we are on a + // non-darwin platform. We don't support PIC on other platforms yet. + bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && + TM.getSubtarget<PPCSubtarget>().isDarwin(); + if (isPIC) { + HiOpFlags |= PPCII::MO_PIC_FLAG; + LoOpFlags |= PPCII::MO_PIC_FLAG; } - if (TM.getRelocationModel() == Reloc::PIC_) { - // With PIC, the first instruction is actually "GR+hi(&G)". - Hi = DAG.getNode(ISD::ADD, dl, PtrVT, - DAG.getNode(PPCISD::GlobalBaseReg, - DebugLoc(), PtrVT), Hi); + // If this is a reference to a global value that requires a non-lazy-ptr, make + // sure that instruction lowering adds it. + if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { + HiOpFlags |= PPCII::MO_NLP_FLAG; + LoOpFlags |= PPCII::MO_NLP_FLAG; + + if (GV->hasHiddenVisibility()) { + HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; + LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; + } } - Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); - return Lo; + return isPIC; } -SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = Op.getValueType(); - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); +static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, + SelectionDAG &DAG) { + EVT PtrVT = HiPart.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); - // FIXME there isn't really any debug loc here - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = HiPart.getDebugLoc(); - const TargetMachine &TM = DAG.getTarget(); + SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); + SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); - SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, JTI, Zero); - SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, JTI, Zero); + // With PIC, the first instruction is actually "GR+hi(&G)". + if (isPIC) + Hi = DAG.getNode(ISD::ADD, DL, PtrVT, + DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); - // If this is a non-darwin platform, we don't support non-static relo models - // yet. - if (TM.getRelocationModel() == Reloc::Static || - !TM.getSubtarget<PPCSubtarget>().isDarwin()) { - // Generate non-pic code that has direct accesses to the constant pool. - // The address of the global is just (hi(&g)+lo(&g)). - return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); - } + // Generate non-pic code that has direct accesses to the constant pool. + // The address of the global is just (hi(&g)+lo(&g)). + return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); +} - if (TM.getRelocationModel() == Reloc::PIC_) { - // With PIC, the first instruction is actually "GR+hi(&G)". - Hi = DAG.getNode(ISD::ADD, dl, PtrVT, - DAG.getNode(PPCISD::GlobalBaseReg, - DebugLoc(), PtrVT), Hi); - } +SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + EVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + const Constant *C = CP->getConstVal(); - Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); - return Lo; + unsigned MOHiFlag, MOLoFlag; + bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + SDValue CPIHi = + DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); + SDValue CPILo = + DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); + return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); } -SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - llvm_unreachable("TLS not implemented for PPC."); - return SDValue(); // Not reached +SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + EVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + + unsigned MOHiFlag, MOLoFlag; + bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); + SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); + return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); } SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); - SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true); - SDValue Zero = DAG.getConstant(0, PtrVT); - SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero); - SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, TgtBA, Zero); - - // If this is a non-darwin platform, we don't support non-static relo models - // yet. - const TargetMachine &TM = DAG.getTarget(); - if (TM.getRelocationModel() == Reloc::Static || - !TM.getSubtarget<PPCSubtarget>().isDarwin()) { - // Generate non-pic code that has direct accesses to globals. - // The address of the global is just (hi(&g)+lo(&g)). - return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); - } - if (TM.getRelocationModel() == Reloc::PIC_) { - // With PIC, the first instruction is actually "GR+hi(&G)". - Hi = DAG.getNode(ISD::ADD, DL, PtrVT, - DAG.getNode(PPCISD::GlobalBaseReg, - DebugLoc(), PtrVT), Hi); - } - - return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); + unsigned MOHiFlag, MOLoFlag; + bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); + SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag); + SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag); + return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - // FIXME there isn't really any debug info here - DebugLoc dl = GSDN->getDebugLoc(); + DebugLoc DL = GSDN->getDebugLoc(); const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GSDN->getOffset()); - SDValue Zero = DAG.getConstant(0, PtrVT); - - const TargetMachine &TM = DAG.getTarget(); // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { - return DAG.getNode(PPCISD::TOC_ENTRY, dl, MVT::i64, GA, + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); + return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); } - SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero); - SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero); - - // If this is a non-darwin platform, we don't support non-static relo models - // yet. - if (TM.getRelocationModel() == Reloc::Static || - !TM.getSubtarget<PPCSubtarget>().isDarwin()) { - // Generate non-pic code that has direct accesses to globals. - // The address of the global is just (hi(&g)+lo(&g)). - return DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); - } - - if (TM.getRelocationModel() == Reloc::PIC_) { - // With PIC, the first instruction is actually "GR+hi(&G)". - Hi = DAG.getNode(ISD::ADD, dl, PtrVT, - DAG.getNode(PPCISD::GlobalBaseReg, - DebugLoc(), PtrVT), Hi); - } + unsigned MOHiFlag, MOLoFlag; + bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); - Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + SDValue GAHi = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); + SDValue GALo = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); - if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) - return Lo; + SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); - // If the global is weak or external, we have to go through the lazy - // resolution stub. - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0, - false, false, 0); + // If the global reference is actually to a non-lazy-pointer, we have to do an + // extra load to get the address of the global. + if (MOHiFlag & PPCII::MO_NLP_FLAG) + Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), + false, false, 0); + return Ptr; } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { @@ -1353,7 +1324,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); } @@ -1406,43 +1378,47 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // Store first byte : number of int regs SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, - Op.getOperand(1), SV, 0, MVT::i8, - false, false, 0); + Op.getOperand(1), + MachinePointerInfo(SV), + MVT::i8, false, false, 0); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = - DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8, + DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, + MachinePointerInfo(SV, nextOffset), MVT::i8, false, false, 0); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack SDValue thirdStore = - DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset, + DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, + MachinePointerInfo(SV, nextOffset), false, false, 0); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers - return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset, + return DAG.getStore(thirdStore, dl, FR, nextPtr, + MachinePointerInfo(SV, nextOffset), false, false, 0); } #include "PPCGenCallingConv.inc" -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT, +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { return true; } -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT, - EVT &LocVT, +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -1451,7 +1427,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumArgRegs = array_lengthof(ArgRegs); - + unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); // Skip one register if the first unallocated register has an even register @@ -1461,15 +1437,15 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT, if (RegNum != NumArgRegs && RegNum % 2 == 1) { State.AllocateReg(ArgRegs[RegNum]); } - + // Always return false here, as this function only makes sure that the first // unallocated register has an odd register number and does not actually // allocate a register for the current argument. return false; } -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT, - EVT &LocVT, +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -1479,7 +1455,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT, }; const unsigned NumArgRegs = array_lengthof(ArgRegs); - + unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); // If there is only one Floating-point register left we need to put both f64 @@ -1487,7 +1463,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT, if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { State.AllocateReg(ArgRegs[RegNum]); } - + // Always return false here, as this function only makes sure that the two f64 // values a ppc_fp128 value is split into are both passed in registers or both // passed on the stack and does not actually allocate a register for the @@ -1572,7 +1548,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Specifications: // System V Application Binary Interface PowerPC Processor Supplement // AltiVec Technology Programming Interface Manual - + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); @@ -1588,18 +1564,18 @@ PPCTargetLowering::LowerFormalArguments_SVR4( *DAG.getContext()); // Reserve space for the linkage area on the stack. - CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize); + CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); - + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - + // Arguments stored in registers. if (VA.isRegLoc()) { TargetRegisterClass *RC; EVT ValVT = VA.getValVT(); - + switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); @@ -1619,9 +1595,9 @@ PPCTargetLowering::LowerFormalArguments_SVR4( RC = PPC::VRRCRegisterClass; break; } - + // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); InVals.push_back(ArgValue); @@ -1635,7 +1611,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo(), false, false, 0)); } } @@ -1654,7 +1631,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); - + // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned @@ -1663,17 +1640,17 @@ PPCTargetLowering::LowerFormalArguments_SVR4( MinReservedArea = std::max(MinReservedArea, - PPCFrameInfo::getMinCallFrameSize(false, false)); - - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + PPCFrameLowering::getMinCallFrameSize(false, false)); + + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> getStackAlignment(); unsigned AlignMask = TargetAlign-1; MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; - + FI->setMinReservedArea(MinReservedArea); SmallVector<SDValue, 8> MemOps; - + // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { @@ -1705,28 +1682,18 @@ PPCTargetLowering::LowerFormalArguments_SVR4( FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); - // The fixed integer arguments of a variadic function are - // stored to the VarArgsFrameIndex on the stack. - unsigned GPRIndex = 0; - for (; GPRIndex != FuncInfo->getVarArgsNumGPR(); ++GPRIndex) { - SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); - SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, - false, false, 0); - MemOps.push_back(Store); - // Increment the address by four for the next argument to store - SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); - FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); - } - - // If this function is vararg, store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing the - // result of va_next. - for (; GPRIndex != NumGPArgRegs; ++GPRIndex) { - unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); + // The fixed integer arguments of a variadic function are stored to the + // VarArgsFrameIndex on the stack so that they may be loaded by deferencing + // the result of va_next. + for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { + // Get an existing live-in vreg, or add a new one. + unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); + if (!VReg) + VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -1735,27 +1702,17 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 // is set. - // The double arguments are stored to the VarArgsFrameIndex // on the stack. - unsigned FPRIndex = 0; - for (FPRIndex = 0; FPRIndex != FuncInfo->getVarArgsNumFPR(); ++FPRIndex) { - SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); - SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, - false, false, 0); - MemOps.push_back(Store); - // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, - PtrVT); - FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); - } - - for (; FPRIndex != NumFPArgRegs; ++FPRIndex) { - unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); + for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { + // Get an existing live-in vreg, or add a new one. + unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); + if (!VReg) + VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, @@ -1791,7 +1748,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; - unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true); + unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; @@ -1915,18 +1872,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin( InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, - NULL, 0, + MachinePointerInfo(), ObjSize==1 ? MVT::i8 : MVT::i16, false, false, 0); MemOps.push_back(Store); ++GPR_idx; } - + ArgOffset += PtrByteSize; - + continue; } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { @@ -1934,11 +1891,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // to memory. ArgVal will be address of the beginning of // the object. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); ++GPR_idx; @@ -1956,7 +1914,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); ++GPR_idx; } else { @@ -1970,7 +1928,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { @@ -2008,9 +1966,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (ObjectVT == MVT::f32) - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; @@ -2028,7 +1986,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -2063,7 +2021,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset + (ArgSize - ObjSize), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), false, false, 0); } @@ -2082,8 +2040,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( } MinReservedArea = std::max(MinReservedArea, - PPCFrameInfo::getMinCallFrameSize(isPPC64, true)); - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> getStackAlignment(); unsigned AlignMask = TargetAlign-1; MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; @@ -2104,15 +2062,15 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // result of va_next. for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { unsigned VReg; - + if (isPPC64) - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); else - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -2141,7 +2099,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true); + unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); unsigned NumOps = Outs.size(); unsigned PtrByteSize = isPPC64 ? 8 : 4; @@ -2153,7 +2111,6 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // 16-byte aligned. nAltivecParamsAtEnd = 0; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. @@ -2183,11 +2140,11 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. NumBytes = std::max(NumBytes, - PPCFrameInfo::getMinCallFrameSize(isPPC64, true)); + PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); // Tail call needs the stack to be aligned. if (CC==CallingConv::Fast && GuaranteedTailCallOpt) { - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> getStackAlignment(); unsigned AlignMask = TargetAlign-1; NumBytes = (NumBytes + AlignMask) & ~AlignMask; @@ -2292,8 +2249,8 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, int FI = TailCallArgs[i].FrameIdx; // Store relative to framepointer. MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, - PseudoSourceValue::getFixedStack(FI), - 0, false, false, 0)); + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); } } @@ -2311,26 +2268,26 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, if (SPDiff) { // Calculate the new stack slot for the return address. int SlotSize = isPPC64 ? 8 : 4; - int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, + int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewRetAddr), 0, + MachinePointerInfo::getFixedStack(NewRetAddr), false, false, 0); // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack // slot as the FP is never overwritten. if (isDarwinABI) { int NewFPLoc = - SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); + SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, - PseudoSourceValue::getFixedStack(NewFPIdx), 0, + MachinePointerInfo::getFixedStack(NewFPIdx), false, false, 0); } } @@ -2369,15 +2326,15 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, // Load the LR and FP stack slot for later adjusting. EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); - LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0, + LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), false, false, 0); Chain = SDValue(LROpOut.getNode(), 1); - + // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack // slot as the FP is never overwritten. if (isDarwinABI) { FPOpOut = getFramePointerFrameIndex(DAG); - FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0, + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), false, false, 0); Chain = SDValue(FPOpOut.getNode(), 1); } @@ -2397,7 +2354,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, DebugLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, NULL, 0, NULL, 0); + false, false, MachinePointerInfo(0), + MachinePointerInfo(0)); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of @@ -2407,7 +2365,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVector<SDValue, 8> &MemOpChains, - SmallVector<TailCallArgumentInfo, 8>& TailCallArguments, + SmallVector<TailCallArgumentInfo, 8> &TailCallArguments, DebugLoc dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); if (!isTailCall) { @@ -2420,8 +2378,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); } - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0)); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0)); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); @@ -2460,10 +2418,14 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys, - bool isPPC64, bool isSVR4ABI) { + const PPCSubtarget &PPCSubTarget) { + + bool isPPC64 = PPCSubTarget.isPPC64(); + bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; @@ -2473,24 +2435,49 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, Callee = SDValue(Dest, 0); needIndirectCall = false; } - // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 - // Use indirect calls for ALL functions calls in JIT mode, since the - // far-call stubs may be outside relocation limits for a BL instruction. - if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 + // Use indirect calls for ALL functions calls in JIT mode, since the + // far-call stubs may be outside relocation limits for a BL instruction. + if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { + unsigned OpFlags = 0; + if (DAG.getTarget().getRelocationModel() != Reloc::Static && + PPCSubTarget.getDarwinVers() < 9 && + (G->getGlobal()->isDeclaration() || + G->getGlobal()->isWeakForLinker())) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = PPCII::MO_DARWIN_STUB; + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress / + // TargetExternalSymbol node so that legalize doesn't hack it. Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - Callee.getValueType()); + Callee.getValueType(), + 0, OpFlags); needIndirectCall = false; } } + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), - Callee.getValueType()); - needIndirectCall = false; + unsigned char OpFlags = 0; + + if (DAG.getTarget().getRelocationModel() != Reloc::Static && + PPCSubTarget.getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = PPCII::MO_DARWIN_STUB; + } + + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), + OpFlags); + needIndirectCall = false; } + if (needIndirectCall) { // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair // to do the call, we can't use PPCISD::CALL. @@ -2525,7 +2512,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // Load the address of the function entry point from the function // descriptor. - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Flag); + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, InFlag.getNode() ? 3 : 2); Chain = LoadFuncPtr.getValue(1); @@ -2552,7 +2539,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // prevents the register allocator from allocating it), resulting in an // additional register being allocated and an unnecessary move instruction // being generated. - VTs = DAG.getVTList(MVT::Other, MVT::Flag); + VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, Callee, InFlag); Chain = LoadTOCPtr.getValue(0); @@ -2569,7 +2556,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.clear(); NodeTys.push_back(MVT::Other); - NodeTys.push_back(MVT::Flag); + NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; Callee.setNode(0); @@ -2637,8 +2624,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, isTailCall, RegsToPass, Ops, NodeTys, - PPCSubTarget.isPPC64(), - PPCSubTarget.isSVR4ABI()); + PPCSubTarget); // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in @@ -2684,7 +2670,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); if (CallOpc == PPCISD::BCTRL_SVR4) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -2699,7 +2685,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, InFlag = Chain.getValue(1); } else { // Otherwise insert NOP. - InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag); + InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag); } } @@ -2726,15 +2712,14 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); - if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { + if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); - } else { - return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, OutVals, Ins, - dl, DAG, InVals); - } + + return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, OutVals, Ins, + dl, DAG, InVals); } SDValue @@ -2763,7 +2748,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // in this function's (MF) stack pointer stack slot 0(SP). if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); - + // Count how many bytes are to be pushed on the stack, including the linkage // area, parameter list area and the part of the local variable space which // contains copies of aggregates which are passed by value. @@ -2774,19 +2759,19 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. - CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize); + CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); if (isVarArg) { // Handle fixed and variable vector arguments differently. // Fixed vector arguments go into registers as long as registers are // available. Variable vector arguments always go into memory. unsigned NumArgs = Outs.size(); - + for (unsigned i = 0; i != NumArgs; ++i) { - EVT ArgVT = Outs[i].VT; + MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; - + if (Outs[i].IsFixed) { Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); @@ -2794,11 +2779,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } - + if (Result) { #ifndef NDEBUG errs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString() << "\n"; + << EVT(ArgVT).getEVTString() << "\n"; #endif llvm_unreachable(0); } @@ -2807,7 +2792,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // All arguments are treated the same. CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); } - + // Assign locations to all of the outgoing aggregate by value arguments. SmallVector<CCValAssign, 16> ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs, @@ -2822,7 +2807,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // space variable where copies of aggregates which are passed by value are // stored. unsigned NumBytes = CCByValInfo.getNextStackOffset(); - + // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); @@ -2842,7 +2827,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); - + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<TailCallArgumentInfo, 8> TailCallArguments; SmallVector<SDValue, 8> MemOpChains; @@ -2854,7 +2839,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; - + if (Flags.isByVal()) { // Argument is an aggregate which is passed by value, thus we need to // create a copy of it in the local variable space of the current stack @@ -2863,33 +2848,33 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, assert((j < ByValArgLocs.size()) && "Index out of bounds!"); CCValAssign &ByValVA = ByValArgLocs[j++]; assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); - + // Memory reserved in the local variable space of the callers stack frame. unsigned LocMemOffset = ByValVA.getLocMemOffset(); - + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - + // Create a copy of the argument in the local area of the current // stack frame. SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); - + // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); Chain = CallSeqStart = NewCallSeqStart; - + // Pass the address of the aggregate copy on the stack either in a // physical register or in the parameter list area of the current stack // frame to the callee. Arg = PtrOff; } - + if (VA.isRegLoc()) { // Put argument in a physical register. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -2903,7 +2888,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset, + MachinePointerInfo(), false, false, 0)); } else { // Calculate and remember argument location. @@ -2912,11 +2897,11 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, } } } - + if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - + // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; @@ -2925,7 +2910,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - + // Set CR6 to true if this is a vararg call. if (isVarArg) { SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0); @@ -2933,10 +2918,9 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } - if (isTailCall) { + if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, false, TailCallArguments); - } return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, @@ -3012,7 +2996,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. - unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true); + unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; static const unsigned GPR_32[] = { // 32-bit registers. @@ -3066,8 +3050,9 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Everything else is passed left-justified. EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg, - NULL, 0, VT, false, false, 0); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, + MachinePointerInfo(), VT, + false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3104,7 +3089,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0, + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, + MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3136,21 +3122,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); if (isVarArg) { - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Store); // Float varargs are always shadowed in available integer registers if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0, - false, false, 0); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, + MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0, + SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, + MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3194,11 +3181,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // entirely in R registers. Maybe later. PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, PtrVT)); - SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { - SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0, + SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, + MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); @@ -3209,7 +3197,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, PtrVT)); - SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0, + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3275,14 +3263,14 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // TOC save area offset. SDValue PtrOff = DAG.getIntPtrConstant(40); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0, + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), false, false, 0); } // On Darwin, R12 must contain the address of an indirect callee. This does // not mean the MTCTR instruction must use R12; it's easier to model this as // an extra parameter, so do that. - if (!isTailCall && + if (!isTailCall && !dyn_cast<GlobalAddressSDNode>(Callee) && !dyn_cast<ExternalSymbolSDNode>(Callee) && !isBLACompatibleAddress(Callee, DAG)) @@ -3298,10 +3286,9 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } - if (isTailCall) { + if (isTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, FPOp, true, TailCallArguments); - } return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, @@ -3362,14 +3349,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue SaveSP = Op.getOperand(1); // Load the old link SP. - SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0, + SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, + MachinePointerInfo(), false, false, 0); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. - return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0, + return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), false, false, 0); } @@ -3390,7 +3378,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. - int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); + int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); // Save the result. @@ -3414,7 +3402,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, + int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. @@ -3533,7 +3521,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : - PPCISD::FCTIDZ, + PPCISD::FCTIDZ, dl, MVT::f64, Src); break; case MVT::i64: @@ -3545,15 +3533,15 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0, - false, false, 0); + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, + MachinePointerInfo(), false, false, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. if (Op.getValueType() == MVT::i32) FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0, + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), false, false, 0); } @@ -3565,8 +3553,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, return SDValue(); if (Op.getOperand(0).getValueType() == MVT::i64) { - SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, dl, - MVT::f64, Op.getOperand(0)); + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); if (Op.getValueType() == MVT::f32) FP = DAG.getNode(ISD::FP_ROUND, dl, @@ -3591,14 +3578,15 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, // STD the extended value into the stack slot. MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, 0, 8, 8); + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, 8, 8); SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx }; SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), Ops, 4, MVT::i64, MMO); // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0); + SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), + false, false, 0); // FCFID it and return it. SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); @@ -3637,19 +3625,19 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // Save FP Control Word to register NodeTys.push_back(MVT::f64); // return register - NodeTys.push_back(MVT::Flag); // unused in this context + NodeTys.push_back(MVT::Glue); // unused in this context SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); // Save FP register to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, - StackSlot, NULL, 0, false, false, 0); + StackSlot, MachinePointerInfo(), false, false,0); // Load FP Control Word from low 32 bits of stack slot. SDValue Four = DAG.getConstant(4, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); - SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0, + SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), false, false, 0); // Transform as necessary @@ -3786,7 +3774,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, Ops.assign(CanonicalVT.getVectorNumElements(), Elt); SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], Ops.size()); - return DAG.getNode(ISD::BIT_CONVERT, dl, ReqVT, Res); + return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); } /// BuildIntrinsicOp - Return a binary operator intrinsic node with the @@ -3815,14 +3803,14 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, DebugLoc dl) { // Force LHS/RHS to be the right type. - LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS); - RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS); + LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); + RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); int Ops[16]; for (unsigned i = 0; i != 16; ++i) Ops[i] = i + Amt; SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); + return DAG.getNode(ISD::BITCAST, dl, VT, T); } // If this is a case we can't handle, return null and let the default @@ -3856,7 +3844,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { SDValue Z = DAG.getConstant(0, MVT::i32); Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); - Op = DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Z); + Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); } return Op; } @@ -3875,7 +3863,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is @@ -3891,7 +3879,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // xor by OnesV to invert it. Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // Check to see if this is a wide variety of vsplti*, binop self cases. @@ -3917,7 +3905,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, Intrinsic::ppc_altivec_vslw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + srl self. @@ -3928,7 +3916,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, Intrinsic::ppc_altivec_vsrw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + sra self. @@ -3939,7 +3927,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, Intrinsic::ppc_altivec_vsraw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + rol self. @@ -3951,7 +3939,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, Intrinsic::ppc_altivec_vrlw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Res); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // t = vsplti c, result = vsldoi t, t, 1 @@ -3978,14 +3966,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); } // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). if (SextVal >= -31 && SextVal <= 0) { SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), LHS); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); } return SDValue(); @@ -4062,10 +4050,10 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } EVT VT = OpLHS.getValueType(); - OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS); - OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS); + OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); + OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, T); + return DAG.getNode(ISD::BITCAST, dl, VT, T); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this @@ -4118,7 +4106,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // perfect shuffle table to emit an optimal matching sequence. SmallVector<int, 16> PermMask; SVOp->getMask(PermMask); - + unsigned PFIndexes[4]; bool isFourElementShuffle = true; for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number @@ -4253,7 +4241,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(CompareOpc, MVT::i32)); - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); } // Create the PPCISD altivec 'dot' comparison node. @@ -4264,7 +4252,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, }; std::vector<EVT> VTs; VTs.push_back(Op.getOperand(2).getValueType()); - VTs.push_back(MVT::Flag); + VTs.push_back(MVT::Glue); SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); // Now that we have the comparison, emit a copy from the CR to a GPR. @@ -4317,10 +4305,10 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, // Store the input value into Value#0 of the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Op.getOperand(0), FIdx, NULL, 0, + Op.getOperand(0), FIdx, MachinePointerInfo(), false, false, 0); // Load it out. - return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0, + return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), false, false, 0); } @@ -4336,9 +4324,9 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); // Shrinkify inputs to v8i16. - LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, LHS); - RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHS); - RHSSwap = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, RHSSwap); + LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); + RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); + RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); // Low parts multiplied together, generating 32-bit results (we ignore the // top parts). @@ -4364,12 +4352,12 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { // Multiply the even 8-bit parts, producing 16-bit sums. SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, LHS, RHS, DAG, dl, MVT::v8i16); - EvenParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, EvenParts); + EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); // Multiply the odd 8-bit parts, producing 16-bit sums. SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, LHS, RHS, DAG, dl, MVT::v8i16); - OddParts = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OddParts); + OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); // Merge the results together. int Ops[16]; @@ -4391,7 +4379,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); @@ -4456,20 +4444,20 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; NodeTys.push_back(MVT::f64); // Return register - NodeTys.push_back(MVT::Flag); // Returns a flag for later insns + NodeTys.push_back(MVT::Glue); // Returns a flag for later insns Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); MFFSreg = Result.getValue(0); InFlag = Result.getValue(1); NodeTys.clear(); - NodeTys.push_back(MVT::Flag); // Returns a flag + NodeTys.push_back(MVT::Glue); // Returns a flag Ops[0] = DAG.getConstant(31, MVT::i32); Ops[1] = InFlag; Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); InFlag = Result.getValue(0); NodeTys.clear(); - NodeTys.push_back(MVT::Flag); // Returns a flag + NodeTys.push_back(MVT::Glue); // Returns a flag Ops[0] = DAG.getConstant(30, MVT::i32); Ops[1] = InFlag; Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); @@ -4477,7 +4465,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, NodeTys.clear(); NodeTys.push_back(MVT::f64); // result of add - NodeTys.push_back(MVT::Flag); // Returns a flag + NodeTys.push_back(MVT::Glue); // Returns a flag Ops[0] = Lo; Ops[1] = Hi; Ops[2] = InFlag; @@ -5283,7 +5271,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAG.getConstant(CompareOpc, MVT::i32) }; VTs.push_back(LHS.getOperand(2).getValueType()); - VTs.push_back(MVT::Flag); + VTs.push_back(MVT::Glue); SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); // Unpack the result based on how the target uses it. @@ -5377,6 +5365,47 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +PPCTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + const Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'b': + if (type->isIntegerTy()) + weight = CW_Register; + break; + case 'f': + if (type->isFloatTy()) + weight = CW_Register; + break; + case 'd': + if (type->isDoubleTy()) + weight = CW_Register; + break; + case 'v': + if (type->isVectorTy()) + weight = CW_Register; + break; + case 'y': + weight = CW_Register; + break; + } + return weight; +} + std::pair<unsigned, const TargetRegisterClass*> PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { @@ -5536,19 +5565,19 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - - DAG.getConstant(PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI), + + DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), isPPC64? MVT::i64 : MVT::i32); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0, false, false, 0); + MachinePointerInfo(), false, false, 0); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0, false, false, 0); + RetAddrFI, MachinePointerInfo(), false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -5571,7 +5600,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, PtrVT); while (Depth--) FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), - FrameAddr, NULL, 0, false, false, 0); + FrameAddr, MachinePointerInfo(), false, false, 0); return FrameAddr; } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 700816f..80cab75 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -308,6 +308,12 @@ namespace llvm { bool is8bit, unsigned Opcode) const; ConstraintType getConstraintType(const std::string &Constraint) const; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -383,7 +389,6 @@ namespace llvm { SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index a0781b9..6636b69 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -23,9 +23,11 @@ def u16imm64 : Operand<i64> { } def symbolHi64 : Operand<i64> { let PrintMethod = "printSymbolHi"; + let EncoderMethod = "getHA16Encoding"; } def symbolLo64 : Operand<i64> { let PrintMethod = "printSymbolLo"; + let EncoderMethod = "getLO16Encoding"; } //===----------------------------------------------------------------------===// @@ -58,7 +60,7 @@ def HI48_64 : SDNodeXForm<imm, [{ // let Defs = [LR8] in - def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>, + def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "", []>, PPC970_Unit_BRU; // Darwin ABI Calls. @@ -130,39 +132,31 @@ def : Pat<(PPCnop), let usesCustomInserter = 1 in { let Uses = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), - "${:comment} ATOMIC_LOAD_ADD_I64 PSEUDO!", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_SUB_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), - "${:comment} ATOMIC_LOAD_SUB_I64 PSEUDO!", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_OR_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), - "${:comment} ATOMIC_LOAD_OR_I64 PSEUDO!", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_XOR_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), - "${:comment} ATOMIC_LOAD_XOR_I64 PSEUDO!", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_AND_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), - "${:comment} ATOMIC_LOAD_AND_I64 PSEUDO!", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_LOAD_NAND_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), - "${:comment} ATOMIC_LOAD_NAND_I64 PSEUDO!", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), - "${:comment} ATOMIC_CMP_SWAP_I64 PSEUDO!", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "", [(set G8RC:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>; def ATOMIC_SWAP_I64 : Pseudo< - (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), - "${:comment} ATOMIC_SWAP_I64 PSEUDO!", + (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "", [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>; } } @@ -240,8 +234,7 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), } let Defs = [X1], Uses = [X1] in -def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi), - "${:comment} DYNALLOC8 $result, $negsize, $fpsi", +def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"", [(set G8RC:$result, (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>; @@ -500,7 +493,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), // Update forms. let mayLoad = 1 in -def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, +def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), "lhau $rD, $disp($rA)", LdStGeneral, []>, RegConstraint<"$rA = $ea_result">, @@ -555,18 +548,20 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; -def LDtoc: DSForm_1<58, 0, (outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), - "ld $rD, $disp($reg)", LdStLD, - [(set G8RC:$rD, +def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "", + [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; -let RST = 2, DS = 8 in + +let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, [(PPCload_toc G8RC:$reg)]>, isPPC64; -let RST = 2, DS = 40, RA = 1 in + +let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. def LDtoc_restore : DSForm_1<58, 0, (outs), (ins), "ld 2, 40(1)", LdStLD, - []>, isPPC64; + [(PPCtoc_restore)]>, isPPC64; def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; @@ -579,8 +574,6 @@ def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr } -def : Pat<(PPCtoc_restore), - (LDtoc_restore)>; def : Pat<(PPCload ixaddr:$src), (LD ixaddr:$src)>; def : Pat<(PPCload xaddr:$src), @@ -621,14 +614,14 @@ def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), let PPC970_Unit = 2 in { -def STBU8 : DForm_1<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, +def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "stbu $rS, $ptroff($ptrreg)", LdStGeneral, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, +def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "sthu $rS, $ptroff($ptrreg)", LdStGeneral, [(set ptr_rc:$ea_res, @@ -636,8 +629,8 @@ def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - s16immX4:$ptroff, ptr_rc:$ptrreg), +def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + s16immX4:$ptroff, ptr_rc:$ptrreg), "stdu $rS, $ptroff($ptrreg)", LdStSTD, [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 4357bdc..84a15b1 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -102,6 +102,19 @@ class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + + let Inst{6-10} = A; + let Inst{11-15} = Addr{20-16}; // Base Reg + let Inst{16-31} = Addr{15-0}; // Displacement +} + +class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> A; bits<16> C; bits<5> B; @@ -112,6 +125,7 @@ class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr, let Inst{16-31} = C; } + class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>; @@ -147,8 +161,7 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> { let A = 0; - let B = 0; - let C = 0; + let Addr = 0; } class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr, @@ -188,17 +201,31 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> : I<opcode, OOL, IOL, asmstr, itin> { bits<5> RST; - bits<14> DS; - bits<5> RA; + bits<19> DS_RA; let Pattern = pattern; let Inst{6-10} = RST; - let Inst{11-15} = RA; - let Inst{16-29} = DS; + let Inst{11-15} = DS_RA{18-14}; // Register # + let Inst{16-29} = DS_RA{13-0}; // Displacement. let Inst{30-31} = xo; } +class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { + bits<5> RST; + bits<14> DS; + bits<5> RA; + + let Pattern = pattern; + + let Inst{6-10} = RST; + let Inst{11-15} = RA; + let Inst{16-29} = DS; + let Inst{30-31} = xo; +} + // 1.7.6 X-Form class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index c17108f..53b0491 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -17,6 +17,7 @@ #include "PPCPredicates.h" #include "PPCGenInstrInfo.inc" #include "PPCTargetMachine.h" +#include "PPCHazardRecognizers.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -39,7 +40,19 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} -unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, +/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for +/// this target when scheduling the DAG. +ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( + const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Should use subtarget info to pick the right hazard recognizer. For + // now, always return a PPC970 recognizer. + const TargetInstrInfo *TII = TM->getInstrInfo(); + assert(TII && "No InstrInfo?"); + return new PPCHazardRecognizer970(*TII); +} + +unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { default: break; @@ -57,7 +70,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } -unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, +unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { default: break; @@ -84,11 +97,11 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Normal instructions can be commuted the obvious way. if (MI->getOpcode() != PPC::RLWIMI) return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); - + // Cannot commute if it has a non-zero rotate count. if (MI->getOperand(3).getImm() != 0) return 0; - + // If we have a zero rotate count, we have: // M = mask(MB,ME) // Op0 = (Op1 & ~M) | (Op2 & M) @@ -135,14 +148,14 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { MI->getOperand(1).setReg(Reg2); MI->getOperand(2).setIsKill(Reg1IsKill); MI->getOperand(1).setIsKill(Reg2IsKill); - + // Swap the mask around. MI->getOperand(4).setImm((ME+1) & 31); MI->getOperand(5).setImm((MB-1) & 31); return MI; } -void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, +void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; BuildMI(MBB, MI, DL, get(PPC::NOP)); @@ -169,7 +182,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // Get the last instruction in the block. MachineInstr *LastInst = I; - + // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (LastInst->getOpcode() == PPC::B) { @@ -189,7 +202,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // Otherwise, don't know what this is. return true; } - + // Get the instruction before it if it's a terminator. MachineInstr *SecondLastInst = I; @@ -197,9 +210,9 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; - + // If the block ends with PPC::B and PPC:BCC, handle it. - if (SecondLastInst->getOpcode() == PPC::BCC && + if (SecondLastInst->getOpcode() == PPC::BCC && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(2).isMBB() || !LastInst->getOperand(0).isMBB()) @@ -210,10 +223,10 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, FBB = LastInst->getOperand(0).getMBB(); return false; } - + // If the block ends with two PPC:Bs, handle it. The second one is not // executed, so remove it. - if (SecondLastInst->getOpcode() == PPC::B && + if (SecondLastInst->getOpcode() == PPC::B && LastInst->getOpcode() == PPC::B) { if (!SecondLastInst->getOperand(0).isMBB()) return true; @@ -239,17 +252,17 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { } if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC) return 0; - + // Remove the branch. I->eraseFromParent(); - + I = MBB.end(); if (I == MBB.begin()) return 1; --I; if (I->getOpcode() != PPC::BCC) return 1; - + // Remove the branch. I->eraseFromParent(); return 2; @@ -262,9 +275,9 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && + assert((Cond.size() == 2 || Cond.size() == 0) && "PPC branch conditions have two components!"); - + // One-way branch. if (FBB == 0) { if (Cond.empty()) // Unconditional branch @@ -274,7 +287,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); return 1; } - + // Two-way Conditional Branch. BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); @@ -377,11 +390,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // We need to store the CR in the low 4-bits of the saved value. First, // issue a MFCR to save all of the CRBits. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? PPC::R2 : PPC::R0; NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg) .addReg(SrcReg, getKillRegState(isKill))); - + // If the saved register wasn't CR0, shift the bits left so that they are // in CR0's slot. if (SrcReg != PPC::CR0) { @@ -391,7 +404,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, .addReg(ScratchReg).addImm(ShiftBits) .addImm(0).addImm(31)); } - + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) .addReg(ScratchReg, getKillRegState(isKill)), @@ -428,14 +441,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) Reg = PPC::CR7; - return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, + return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, PPC::CRRCRegisterClass, NewMIs); } else if (RC == PPC::VRRCRegisterClass) { // We don't have indexed addressing for vector loads. Emit: // R0 = ADDI FI# // STVX VAL, 0, R0 - // + // // FIXME: We use R0 here, because it isn't available for RA. NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), FrameIdx, 0, 0)); @@ -469,8 +482,9 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, /*Offset=*/0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); @@ -513,9 +527,9 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, // at the moment. unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), ScratchReg), FrameIdx)); - + // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { @@ -525,11 +539,11 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) .addImm(31)); } - + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) .addReg(ScratchReg)); } else if (RC == PPC::CRBITRCRegisterClass) { - + unsigned Reg = 0; if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN) @@ -556,14 +570,14 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN) Reg = PPC::CR7; - return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, + return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, PPC::CRRCRegisterClass, NewMIs); } else if (RC == PPC::VRRCRegisterClass) { // We don't have indexed addressing for vector loads. Emit: // R0 = ADDI FI# // Dest = LVX 0, R0 - // + // // FIXME: We use R0 here, because it isn't available for RA. NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0), FrameIdx, 0, 0)); @@ -590,8 +604,9 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), - MachineMemOperand::MOLoad, /*Offset=*/0, + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index fc7b7b3..b5249ae 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -32,7 +32,7 @@ enum { /// PPC970_First - This instruction starts a new dispatch group, so it will /// always be the first one in the group. PPC970_First = 0x1, - + /// PPC970_Single - This instruction starts a new dispatch group and /// terminates it, so it will be the sole instruction in the group. PPC970_Single = 0x2, @@ -40,7 +40,7 @@ enum { /// PPC970_Cracked - This instruction is cracked into two pieces, requiring /// two dispatch pipes to be available to issue. PPC970_Cracked = 0x4, - + /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that /// an instruction is issued to. PPC970_Shift = 3, @@ -58,9 +58,9 @@ enum PPC970_Unit { PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit PPC970_BRU = 7 << PPC970_Shift // Branch Unit }; -} - - +} // end namespace PPCII + + class PPCInstrInfo : public TargetInstrInfoImpl { PPCTargetMachine &TM; const PPCRegisterInfo RI; @@ -69,7 +69,7 @@ class PPCInstrInfo : public TargetInstrInfoImpl { unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; - void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; @@ -82,6 +82,10 @@ public: /// virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, @@ -90,8 +94,8 @@ public: // commuteInstruction - We can commute rlwimi instructions, but only if the // rotate amt is zero. We also have to munge the immediates a bit. virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const; - - virtual void insertNoop(MachineBasicBlock &MBB, + + virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; @@ -109,7 +113,7 @@ public: MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -121,7 +125,7 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -130,7 +134,7 @@ public: virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index eb100ec..82aadeb 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -68,17 +68,17 @@ def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, // This sequence is used for long double->int conversions. It changes the // bits in the FPSCR which is not modelled. def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, - [SDNPOutFlag]>; + [SDNPOutGlue]>; def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInFlag, SDNPOutFlag]>; + [SDNPInGlue, SDNPOutGlue]>; def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInFlag, SDNPOutFlag]>; + [SDNPInGlue, SDNPOutGlue]>; def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, - [SDNPInFlag, SDNPOutFlag]>; + [SDNPInGlue, SDNPOutGlue]>; def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>, SDTCisVT<3, f64>]>, - [SDNPInFlag]>; + [SDNPInGlue]>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -105,45 +105,45 @@ def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>; +def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, - [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; -def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>; +def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain, SDNPMayLoad]>; @@ -286,31 +286,38 @@ def u16imm : Operand<i32> { def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing. let PrintMethod = "printS16X4ImmOperand"; } -def target : Operand<OtherVT> { +def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getDirectBrEncoding"; +} +def condbrtarget : Operand<OtherVT> { + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getCondBrEncoding"; } def calltarget : Operand<iPTR> { - let PrintMethod = "printCallOperand"; + let EncoderMethod = "getDirectBrEncoding"; } def aaddr : Operand<iPTR> { let PrintMethod = "printAbsAddrOperand"; } -def piclabel: Operand<iPTR> { - let PrintMethod = "printPICLabel"; -} +def piclabel: Operand<iPTR> {} def symbolHi: Operand<i32> { let PrintMethod = "printSymbolHi"; + let EncoderMethod = "getHA16Encoding"; } def symbolLo: Operand<i32> { let PrintMethod = "printSymbolLo"; + let EncoderMethod = "getLO16Encoding"; } def crbitm: Operand<i8> { let PrintMethod = "printcrbitm"; + let EncoderMethod = "get_crbitm_encoding"; } // Address operands def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; @@ -319,9 +326,9 @@ def memrr : Operand<iPTR> { def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let EncoderMethod = "getMemRIXEncoding"; } def tocentry : Operand<iPTR> { - let PrintMethod = "printTOCEntryLabel"; let MIOperandInfo = (ops i32imm:$imm); } @@ -355,11 +362,9 @@ def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; let hasCtrlDep = 1 in { let Defs = [R1], Uses = [R1] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), - "${:comment} ADJCALLSTACKDOWN", +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "", [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), - "${:comment} ADJCALLSTACKUP", +def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "", [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -368,8 +373,7 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), } let Defs = [R1], Uses = [R1] in -def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), - "${:comment} DYNALLOC $result, $negsize, $fpsi", +def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "", [(set GPRC:$result, (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>; @@ -378,26 +382,26 @@ def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), let usesCustomInserter = 1, // Expanded after instruction selection. PPC970_Single = 1 in { def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F, - i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + i32imm:$BROPC), "", []>; def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F, - i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + i32imm:$BROPC), "", []>; def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, - i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + i32imm:$BROPC), "", []>; def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F, - i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + i32imm:$BROPC), "", []>; def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F, - i32imm:$BROPC), "${:comment} SELECT_CC PSEUDO!", + i32imm:$BROPC), "", []>; } // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F), - "${:comment} SPILL_CR $cond $F", []>; + "", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { let isReturn = 1, Uses = [LR, RM] in @@ -409,12 +413,12 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { } let Defs = [LR] in - def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>, + def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "", []>, PPC970_Unit_BRU; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isBarrier = 1 in { - def B : IForm<18, 0, 0, (outs), (ins target:$dst), + def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst), "b $dst", BrB, [(br bb:$dst)]>; } @@ -422,7 +426,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { // BCC represents an arbitrary conditional branch on a predicate. // FIXME: should be able to write a pattern for PPCcondbranch, but can't use // a two-value operand where a dag node expects two operands. :( - def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, target:$dst), + def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc} ${cond:reg}, $dst" /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; } @@ -548,105 +552,81 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), let usesCustomInserter = 1 in { let Uses = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_SUB_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_AND_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_OR_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_XOR_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_LOAD_NAND_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), - "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), - "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", [(set GPRC:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_CMP_SWAP_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), - "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", [(set GPRC:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_CMP_SWAP_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), - "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "", [(set GPRC:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; def ATOMIC_SWAP_I8 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), - "${:comment} ATOMIC_SWAP_I8 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>; def ATOMIC_SWAP_I16 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), - "${:comment} ATOMIC_SWAP_I16 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>; def ATOMIC_SWAP_I32 : Pseudo< - (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), - "${:comment} ATOMIC_SWAP_I32 PSEUDO!", + (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "", [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>; } } @@ -785,33 +765,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), // Unindexed (r+i) Stores with Update (preinc). let PPC970_Unit = 2 in { -def STBU : DForm_1<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, +def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "stbu $rS, $ptroff($ptrreg)", LdStGeneral, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU : DForm_1<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, +def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "sthu $rS, $ptroff($ptrreg)", LdStGeneral, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, +def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "stwu $rS, $ptroff($ptrreg)", LdStGeneral, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFSU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, +def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "stfsu $rS, $ptroff($ptrreg)", LdStGeneral, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFDU : DForm_1<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, +def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), "stfdu $rS, $ptroff($ptrreg)", LdStGeneral, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, @@ -1120,9 +1100,16 @@ def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS), // As it turns out, in all cases where we currently use this, // we're only interested in one subregister of it. Represent this in the // instruction to keep the register allocator from becoming confused. +// +// FIXME: Make this a real Pseudo instruction when the JIT switches to MC. def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), - "mfcr $rT ${:comment} $FXM", SprMFCR>, + "", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; + +def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), + "mfcr $rT", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; + def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "mfcr $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index daf4ec6..78383e0 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -16,7 +16,7 @@ #include "PPCRelocations.h" #include "PPCTargetMachine.h" #include "llvm/Function.h" -#include "llvm/System/Memory.h" +#include "llvm/Support/Memory.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp index 3644c79..d1178dd 100644 --- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp @@ -17,10 +17,11 @@ using namespace llvm; PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { PCSymbol = "."; CommentString = ";"; - ExceptionsType = ExceptionHandling::Dwarf; + ExceptionsType = ExceptionHandling::DwarfTable; if (!is64Bit) Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. + AssemblerDialect = 1; // New-Style mnemonics. SupportsDebugInformation= true; // Debug information. } @@ -47,7 +48,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { // Exceptions handling if (!is64Bit) - ExceptionsType = ExceptionHandling::Dwarf; + ExceptionsType = ExceptionHandling::DwarfTable; ZeroDirective = "\t.space\t"; Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; diff --git a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp new file mode 100644 index 0000000..65c2c82 --- /dev/null +++ b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp @@ -0,0 +1,195 @@ +//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mccodeemitter" +#include "PPC.h" +#include "PPCRegisterInfo.h" +#include "PPCFixupKinds.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); + +namespace { +class PPCMCCodeEmitter : public MCCodeEmitter { + PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT + const TargetMachine &TM; + MCContext &Ctx; + +public: + PPCMCCodeEmitter(TargetMachine &tm, MCContext &ctx) + : TM(tm), Ctx(ctx) { + } + + ~PPCMCCodeEmitter() {} + + unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + unsigned getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl<MCFixup> &Fixups) const; + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned Bits = getBinaryCodeForInstr(MI, Fixups); + + // Output the constant in big endian byte order. + for (unsigned i = 0; i != 4; ++i) { + OS << (char)(Bits >> 24); + Bits <<= 8; + } + + ++MCNumEmitted; // Keep track of the # of mi's emitted. + } + +}; + +} // end anonymous namespace + +MCCodeEmitter *llvm::createPPCMCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx) { + return new PPCMCCodeEmitter(TM, Ctx); +} + +unsigned PPCMCCodeEmitter:: +getDirectBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_br24)); + return 0; +} + +unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_brcond14)); + return 0; +} + +unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_ha16)); + return 0; +} + +unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); + return 0; +} + +unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + // Encode (imm, reg) as a memri, which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16; + + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; + + // Add a fixup for the displacement field. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); + return RegBits; +} + + +unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + // Encode (imm, reg) as a memrix, which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14; + + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; + + // Add a fixup for the branch target. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo14)); + return RegBits; +} + + +unsigned PPCMCCodeEmitter:: +get_crbitm_encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); + return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg()); +} + + +unsigned PPCMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl<MCFixup> &Fixups) const { + if (MO.isReg()) { + // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. + // The GPR operand should come through here though. + assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || + MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); + return PPCRegisterInfo::getRegisterNumbering(MO.getReg()); + } + + assert(MO.isImm() && + "Relocation required in an instruction that we cannot encode!"); + return MO.getImm(); +} + + +#include "PPCGenMCCodeEmitter.inc" diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp new file mode 100644 index 0000000..6082587 --- /dev/null +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -0,0 +1,172 @@ +//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower PPC MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + +static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { + return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>(); +} + + +static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ + MCContext &Ctx = AP.OutContext; + + SmallString<128> Name; + if (!MO.isGlobal()) { + assert(MO.isSymbol() && "Isn't a symbol reference"); + Name += AP.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + } else { + const GlobalValue *GV = MO.getGlobal(); + bool isImplicitlyPrivate = false; + if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB || + (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)) + isImplicitlyPrivate = true; + + AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + } + + // If the target flags on the operand changes the name of the symbol, do that + // before we return the symbol. + if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) { + Name += "$stub"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = + getMachOMMI(AP).getFnStubEntry(Sym); + if (StubSym.getPointer()) + return Sym; + + if (MO.isGlobal()) { + StubSym = + MachineModuleInfoImpl:: + StubValueTy(AP.Mang->getSymbol(MO.getGlobal()), + !MO.getGlobal()->hasInternalLinkage()); + } else { + Name.erase(Name.end()-5, Name.end()); + StubSym = + MachineModuleInfoImpl:: + StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false); + } + return Sym; + } + + // If the symbol reference is actually to a non_lazy_ptr, not to the symbol, + // then add the suffix. + if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) { + Name += "$non_lazy_ptr"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + + MachineModuleInfoMachO &MachO = getMachOMMI(AP); + + MachineModuleInfoImpl::StubValueTy &StubSym = + (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? + MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym); + + if (StubSym.getPointer() == 0) { + assert(MO.isGlobal() && "Extern symbol not handled yet"); + StubSym = MachineModuleInfoImpl:: + StubValueTy(AP.Mang->getSymbol(MO.getGlobal()), + !MO.getGlobal()->hasInternalLinkage()); + } + return Sym; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, + AsmPrinter &Printer) { + MCContext &Ctx = Printer.OutContext; + MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; + + if (MO.getTargetFlags() & PPCII::MO_LO16) + RefKind = MCSymbolRefExpr::VK_PPC_LO16; + else if (MO.getTargetFlags() & PPCII::MO_HA16) + RefKind = MCSymbolRefExpr::VK_PPC_HA16; + + // FIXME: This isn't right, but we don't have a good way to express this in + // the MC Level, see below. + if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) + RefKind = MCSymbolRefExpr::VK_None; + + const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx); + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + + // Subtract off the PIC base if required. + if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) { + const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + + const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); + Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx); + // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16, + // since it is not a symbol! + } + + return MCOperand::CreateExpr(Expr); +} + +void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + assert(0 && "unknown operand type"); + case MachineOperand::MO_Register: + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), AP.OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_BlockAddress: + MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 653e143..45d8b6b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -17,7 +17,7 @@ #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCRegisterInfo.h" -#include "PPCFrameInfo.h" +#include "PPCFrameLowering.h" #include "PPCSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" @@ -31,7 +31,7 @@ #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -44,16 +44,6 @@ #include "llvm/ADT/STLExtras.h" #include <cstdlib> -// FIXME This disables some code that aligns the stack to a boundary -// bigger than the default (16 bytes on Darwin) when there is a stack local -// of greater alignment. This does not currently work, because the delta -// between old and new stack pointers is added to offsets that reference -// incoming parameters after the prolog is generated, and the code that -// does that doesn't handle a variable delta. You don't want to do that -// anyway; a better approach is to reserve another register that retains -// to the incoming stack pointer, and reference parameters relative to that. -#define ALIGN_STACK 0 - // FIXME (64-bit): Eventually enable by default. namespace llvm { cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", @@ -68,14 +58,11 @@ cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger", using namespace llvm; -#define EnableRegisterScavenging \ - ((EnablePPC32RS && !Subtarget.isPPC64()) || \ - (EnablePPC64RS && Subtarget.isPPC64())) - // FIXME (64-bit): Should be inlined. bool PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return EnableRegisterScavenging; + return ((EnablePPC32RS && !Subtarget.isPPC64()) || + (EnablePPC64RS && Subtarget.isPPC64())); } /// getRegisterNumbering - Given the enum value for some register, e.g. @@ -269,26 +256,11 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; } -// needsFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -// -static bool needsFP(const MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - // Naked functions have no stack frame pushed, so we don't have a frame pointer. - if (MF.getFunction()->hasFnAttr(Attribute::Naked)) - return false; - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || - (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); -} - -static bool spillsCR(const MachineFunction &MF) { - const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - return FuncInfo->isCRSpilled(); -} - BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const PPCFrameLowering *PPCFI = + static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + Reserved.set(PPC::R0); Reserved.set(PPC::R1); Reserved.set(PPC::LR); @@ -314,7 +286,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R13); Reserved.set(PPC::R31); - if (!EnableRegisterScavenging) + if (!requiresRegisterScavenging(MF)) Reserved.set(PPC::R0); // FIXME (64-bit): Remove Reserved.set(PPC::X0); @@ -334,7 +306,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { } } - if (needsFP(MF)) + if (PPCFI->needsFP(MF)) Reserved.set(PPC::R31); return Reserved; @@ -344,30 +316,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -// hasFP - Return true if the specified function actually has a dedicated frame -// pointer register. This is true if the function needs a frame pointer and has -// a non-zero stack size. -bool PPCRegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getStackSize() && needsFP(MF); -} - -/// MustSaveLR - Return true if this function requires that we save the LR -/// register onto the stack in the prolog and restore it in the epilog of the -/// function. -static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { - const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); - - // We need a save/restore of LR if there is any def of LR (which is - // defined by calls, including the PIC setup sequence), or if there is - // some use of the LR stack slot (e.g. for builtin_return_address). - // (LR comes in 32 and 64 bit versions.) - MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); - return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); -} - - - void PPCRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -447,7 +395,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, unsigned FrameSize = MFI->getStackSize(); // Get stack alignments. - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); if (MaxAlign > TargetAlign) report_fatal_error("Dynamic alloca with large aligns not supported"); @@ -464,7 +412,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // FIXME (64-bit): Use "findScratchRegister" unsigned Reg; - if (EnableRegisterScavenging) + if (requiresRegisterScavenging(MF)) Reg = findScratchRegister(II, RS, RC, SPAdj); else Reg = PPC::R0; @@ -474,7 +422,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, .addReg(PPC::R31) .addImm(FrameSize); } else if (LP64) { - if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part. + if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) .addImm(0) .addReg(PPC::X1); @@ -491,7 +439,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { - if (EnableRegisterScavenging) // FIXME (64-bit): Use "true" part. + if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) .addReg(Reg, RegState::Kill) .addReg(PPC::X1) @@ -593,6 +541,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MBB.getParent(); // Get the frame info. MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); // Find out which operand is the frame index. @@ -625,14 +574,15 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Special case for pseudo-op SPILL_CR. - if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. + if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default. if (OpC == PPC::SPILL_CR) { lowerCRSpilling(II, FrameIndex, SPAdj, RS); return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). - MI.getOperand(FIOperandNo).ChangeToRegister(hasFP(MF) ? PPC::R31 : PPC::R1, + MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? + PPC::R31 : PPC::R1, false); // Figure out if the offset in the instruction is shifted right two bits. This @@ -682,7 +632,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // FIXME (64-bit): Use "findScratchRegister". unsigned SReg; - if (EnableRegisterScavenging) + if (requiresRegisterScavenging(MF)) SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj); else SReg = PPC::R0; @@ -715,898 +665,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); } -/// VRRegNo - Map from a numbered VR register to its enum value. -/// -static const unsigned short VRRegNo[] = { - PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , - PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, - PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 -}; - -/// RemoveVRSaveCode - We have found that this function does not need any code -/// to manipulate the VRSAVE register, even though it uses vector registers. -/// This can happen when the only registers used are known to be live in or out -/// of the function. Remove all of the VRSAVE related code from the function. -static void RemoveVRSaveCode(MachineInstr *MI) { - MachineBasicBlock *Entry = MI->getParent(); - MachineFunction *MF = Entry->getParent(); - - // We know that the MTVRSAVE instruction immediately follows MI. Remove it. - MachineBasicBlock::iterator MBBI = MI; - ++MBBI; - assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); - MBBI->eraseFromParent(); - - bool RemovedAllMTVRSAVEs = true; - // See if we can find and remove the MTVRSAVE instruction from all of the - // epilog blocks. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { - // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().getDesc().isReturn()) { - bool FoundIt = false; - for (MBBI = I->end(); MBBI != I->begin(); ) { - --MBBI; - if (MBBI->getOpcode() == PPC::MTVRSAVE) { - MBBI->eraseFromParent(); // remove it. - FoundIt = true; - break; - } - } - RemovedAllMTVRSAVEs &= FoundIt; - } - } - - // If we found and removed all MTVRSAVE instructions, remove the read of - // VRSAVE as well. - if (RemovedAllMTVRSAVEs) { - MBBI = MI; - assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); - --MBBI; - assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); - MBBI->eraseFromParent(); - } - - // Finally, nuke the UPDATE_VRSAVE. - MI->eraseFromParent(); -} - -// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the -// instruction selector. Based on the vector registers that have been used, -// transform this into the appropriate ORI instruction. -static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { - MachineFunction *MF = MI->getParent()->getParent(); - DebugLoc dl = MI->getDebugLoc(); - - unsigned UsedRegMask = 0; - for (unsigned i = 0; i != 32; ++i) - if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i])) - UsedRegMask |= 1 << (31-i); - - // Live in and live out values already must be in the mask, so don't bother - // marking them. - for (MachineRegisterInfo::livein_iterator - I = MF->getRegInfo().livein_begin(), - E = MF->getRegInfo().livein_end(); I != E; ++I) { - unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first); - if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. - UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. - } - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I); - if (VRRegNo[RegNo] == *I) // If this really is a vector reg. - UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. - } - - // If no registers are used, turn this into a copy. - if (UsedRegMask == 0) { - // Remove all VRSAVE code. - RemoveVRSaveCode(MI); - return; - } - - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned DstReg = MI->getOperand(0).getReg(); - - if ((UsedRegMask & 0xFFFF) == UsedRegMask) { - if (DstReg != SrcReg) - BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask); - else - BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask); - } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { - if (DstReg != SrcReg) - BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask >> 16); - else - BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask >> 16); - } else { - if (DstReg != SrcReg) - BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg) - .addImm(UsedRegMask >> 16); - else - BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(UsedRegMask >> 16); - - BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg) - .addReg(DstReg, RegState::Kill) - .addImm(UsedRegMask & 0xFFFF); - } - - // Remove the old UPDATE_VRSAVE instruction. - MI->eraseFromParent(); -} - -/// determineFrameLayout - Determine the size of the frame and maximum call -/// frame size. -void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); - - // Get the alignments provided by the target, and the maximum alignment - // (if any) of the fixed frame objects. - unsigned MaxAlign = MFI->getMaxAlignment(); - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - unsigned AlignMask = TargetAlign - 1; // - - // If we are a leaf function, and use up to 224 bytes of stack space, - // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). - bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. - if (!DisableRedZone && - FrameSize <= 224 && // Fits in red zone. - !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. - (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. - // No need for frame - MFI->setStackSize(0); - return; - } - - // Get the maximum call frame size of all the calls. - unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); - - // Maximum call frame needs to be at least big enough for linkage and 8 args. - unsigned minCallFrameSize = - PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(), - Subtarget.isDarwinABI()); - maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); - - // If we have dynamic alloca then maxCallFrameSize needs to be aligned so - // that allocations will be aligned. - if (MFI->hasVarSizedObjects()) - maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; - - // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); - - // Include call frame size in total. - FrameSize += maxCallFrameSize; - - // Make sure the frame is aligned. - FrameSize = (FrameSize + AlignMask) & ~AlignMask; - - // Update frame info. - MFI->setStackSize(FrameSize); -} - -void -PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - // Save and clear the LR state. - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - unsigned LR = getRARegister(); - FI->setMustSaveLR(MustSaveLR(MF, LR)); - MF.getRegInfo().setPhysRegUnused(LR); - - // Save R31 if necessary - int FPSI = FI->getFramePointerSaveIndex(); - bool isPPC64 = Subtarget.isPPC64(); - bool isDarwinABI = Subtarget.isDarwinABI(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // If the frame pointer save index hasn't been defined yet. - if (!FPSI && needsFP(MF)) { - // Find out what the fix offset of the frame pointer save area. - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, - isDarwinABI); - // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); - // Save the result. - FI->setFramePointerSaveIndex(FPSI); - } - - // Reserve stack space to move the linkage area to in case of a tail call. - int TCSPDelta = 0; - if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { - MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); - } - - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (EnableRegisterScavenging) // FIXME (64-bit): Enable. - if (needsFP(MF) || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } -} - -void -PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { - // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) { - return; - } - - // Get callee saved register information. - MachineFrameInfo *FFI = MF.getFrameInfo(); - const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); - - // Early exit if no callee saved registers are modified! - if (CSI.empty() && !needsFP(MF)) { - return; - } - - unsigned MinGPR = PPC::R31; - unsigned MinG8R = PPC::X31; - unsigned MinFPR = PPC::F31; - unsigned MinVR = PPC::V31; - - bool HasGPSaveArea = false; - bool HasG8SaveArea = false; - bool HasFPSaveArea = false; - bool HasCRSaveArea = false; - bool HasVRSAVESaveArea = false; - bool HasVRSaveArea = false; - - SmallVector<CalleeSavedInfo, 18> GPRegs; - SmallVector<CalleeSavedInfo, 18> G8Regs; - SmallVector<CalleeSavedInfo, 18> FPRegs; - SmallVector<CalleeSavedInfo, 18> VRegs; - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (PPC::GPRCRegisterClass->contains(Reg)) { - HasGPSaveArea = true; - - GPRegs.push_back(CSI[i]); - - if (Reg < MinGPR) { - MinGPR = Reg; - } - } else if (PPC::G8RCRegisterClass->contains(Reg)) { - HasG8SaveArea = true; - - G8Regs.push_back(CSI[i]); - - if (Reg < MinG8R) { - MinG8R = Reg; - } - } else if (PPC::F8RCRegisterClass->contains(Reg)) { - HasFPSaveArea = true; - - FPRegs.push_back(CSI[i]); - - if (Reg < MinFPR) { - MinFPR = Reg; - } -// FIXME SVR4: Disable CR save area for now. - } else if (PPC::CRBITRCRegisterClass->contains(Reg) - || PPC::CRRCRegisterClass->contains(Reg)) { -// HasCRSaveArea = true; - } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) { - HasVRSAVESaveArea = true; - } else if (PPC::VRRCRegisterClass->contains(Reg)) { - HasVRSaveArea = true; - - VRegs.push_back(CSI[i]); - - if (Reg < MinVR) { - MinVR = Reg; - } - } else { - llvm_unreachable("Unknown RegisterClass!"); - } - } - - PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); - - int64_t LowerBound = 0; - - // Take into account stack space reserved for tail calls. - int TCSPDelta = 0; - if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { - LowerBound = TCSPDelta; - } - - // The Floating-point register save area is right below the back chain word - // of the previous stack frame. - if (HasFPSaveArea) { - for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { - int FI = FPRegs[i].getFrameIdx(); - - FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); - } - - LowerBound -= (31 - getRegisterNumbering(MinFPR) + 1) * 8; - } - - // Check whether the frame pointer register is allocated. If so, make sure it - // is spilled to the correct offset. - if (needsFP(MF)) { - HasGPSaveArea = true; - - int FI = PFI->getFramePointerSaveIndex(); - assert(FI && "No Frame Pointer Save Slot!"); - - FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); - } - - // General register save area starts right below the Floating-point - // register save area. - if (HasGPSaveArea || HasG8SaveArea) { - // Move general register save area spill slots down, taking into account - // the size of the Floating-point register save area. - for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { - int FI = GPRegs[i].getFrameIdx(); - - FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); - } - - // Move general register save area spill slots down, taking into account - // the size of the Floating-point register save area. - for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { - int FI = G8Regs[i].getFrameIdx(); - - FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); - } - - unsigned MinReg = std::min<unsigned>(getRegisterNumbering(MinGPR), - getRegisterNumbering(MinG8R)); - - if (Subtarget.isPPC64()) { - LowerBound -= (31 - MinReg + 1) * 8; - } else { - LowerBound -= (31 - MinReg + 1) * 4; - } - } - - // The CR save area is below the general register save area. - if (HasCRSaveArea) { - // FIXME SVR4: Is it actually possible to have multiple elements in CSI - // which have the CR/CRBIT register class? - // Adjust the frame index of the CR spill slot. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - - if (PPC::CRBITRCRegisterClass->contains(Reg) || - PPC::CRRCRegisterClass->contains(Reg)) { - int FI = CSI[i].getFrameIdx(); - - FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); - } - } - - LowerBound -= 4; // The CR save area is always 4 bytes long. - } - - if (HasVRSAVESaveArea) { - // FIXME SVR4: Is it actually possible to have multiple elements in CSI - // which have the VRSAVE register class? - // Adjust the frame index of the VRSAVE spill slot. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - - if (PPC::VRSAVERCRegisterClass->contains(Reg)) { - int FI = CSI[i].getFrameIdx(); - - FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); - } - } - - LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. - } - - if (HasVRSaveArea) { - // Insert alignment padding, we need 16-byte alignment. - LowerBound = (LowerBound - 15) & ~(15); - - for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { - int FI = VRegs[i].getFrameIdx(); - - FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); - } - } -} - -void -PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - DebugLoc dl; - bool needsFrameMoves = MMI.hasDebugInfo() || - !MF.getFunction()->doesNotThrow() || - UnwindTablesMandatory; - - // Prepare for frame info. - MCSymbol *FrameLabel = 0; - - // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, - // process it. - for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { - if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { - HandleVRSaveUpdate(MBBI, TII); - break; - } - } - - // Move MBBI back to the beginning of the function. - MBBI = MBB.begin(); - - // Work out frame sizes. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - - int NegFrameSize = -FrameSize; - - // Get processor type. - bool isPPC64 = Subtarget.isPPC64(); - // Get operating system - bool isDarwinABI = Subtarget.isDarwinABI(); - // Check if the link register (LR) must be saved. - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - bool MustSaveLR = FI->mustSaveLR(); - // Do we have a frame pointer for this function? - bool HasFP = hasFP(MF) && FrameSize; - - int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); - - int FPOffset = 0; - if (HasFP) { - if (Subtarget.isSVR4ABI()) { - MachineFrameInfo *FFI = MF.getFrameInfo(); - int FPIndex = FI->getFramePointerSaveIndex(); - assert(FPIndex && "No Frame Pointer Save Slot!"); - FPOffset = FFI->getObjectOffset(FPIndex); - } else { - FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); - } - } - - if (isPPC64) { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); - - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(PPC::X31) - .addImm(FPOffset/4) - .addReg(PPC::X1); - - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STD)) - .addReg(PPC::X0) - .addImm(LROffset / 4) - .addReg(PPC::X1); - } else { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); - - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(PPC::R31) - .addImm(FPOffset) - .addReg(PPC::R1); - - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) - .addReg(PPC::R0) - .addImm(LROffset) - .addReg(PPC::R1); - } - - // Skip if a leaf routine. - if (!FrameSize) return; - - // Get stack alignments. - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - unsigned MaxAlign = MFI->getMaxAlignment(); - - // Adjust stack pointer: r1 += NegFrameSize. - // If there is a preferred stack alignment, align R1 now - if (!isPPC64) { - // PPC32. - if (ALIGN_STACK && MaxAlign > TargetAlign) { - assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && - "Invalid alignment!"); - assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); - - BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0) - .addReg(PPC::R1) - .addImm(0) - .addImm(32 - Log2_32(MaxAlign)) - .addImm(31); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(NegFrameSize); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) - .addReg(PPC::R1) - .addReg(PPC::R1) - .addReg(PPC::R0); - } else if (isInt<16>(NegFrameSize)) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) - .addReg(PPC::R1) - .addImm(NegFrameSize) - .addReg(PPC::R1); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) - .addReg(PPC::R1) - .addReg(PPC::R1) - .addReg(PPC::R0); - } - } else { // PPC64. - if (ALIGN_STACK && MaxAlign > TargetAlign) { - assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && - "Invalid alignment!"); - assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!"); - - BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0) - .addReg(PPC::X1) - .addImm(0) - .addImm(64 - Log2_32(MaxAlign)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) - .addReg(PPC::X0) - .addImm(NegFrameSize); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) - .addReg(PPC::X1) - .addReg(PPC::X1) - .addReg(PPC::X0); - } else if (isInt<16>(NegFrameSize)) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) - .addReg(PPC::X1) - .addImm(NegFrameSize / 4) - .addReg(PPC::X1); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) - .addImm(NegFrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) - .addReg(PPC::X1) - .addReg(PPC::X1) - .addReg(PPC::X0); - } - } - - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - - // Add the "machine moves" for the instructions we generated above, but in - // reverse order. - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel); - - // Show update of SP. - if (NegFrameSize) { - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - } else { - MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31); - Moves.push_back(MachineMove(FrameLabel, SP, SP)); - } - - if (HasFP) { - MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset); - MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31); - Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); - } - - if (MustSaveLR) { - MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); - MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR); - Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc)); - } - } - - MCSymbol *ReadyLabel = 0; - - // If there is a frame pointer, copy R1 into R31 - if (HasFP) { - if (!isPPC64) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) - .addReg(PPC::R1) - .addReg(PPC::R1); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31) - .addReg(PPC::X1) - .addReg(PPC::X1); - } - - if (needsFrameMoves) { - ReadyLabel = MMI.getContext().CreateTempSymbol(); - - // Mark effective beginning of when frame pointer is ready. - BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel); - - MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : - (isPPC64 ? PPC::X1 : PPC::R1)); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); - } - } - - if (needsFrameMoves) { - MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel; - - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - unsigned Reg = CSI[I].getReg(); - if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(Label, CSDst, CSSrc)); - } - } -} - -void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc dl; - - assert( (RetOpcode == PPC::BLR || - RetOpcode == PPC::TCRETURNri || - RetOpcode == PPC::TCRETURNdi || - RetOpcode == PPC::TCRETURNai || - RetOpcode == PPC::TCRETURNri8 || - RetOpcode == PPC::TCRETURNdi8 || - RetOpcode == PPC::TCRETURNai8) && - "Can only insert epilog into returning blocks"); - - // Get alignment info so we know how to restore r1 - const MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - unsigned MaxAlign = MFI->getMaxAlignment(); - - // Get the number of bytes allocated from the FrameInfo. - int FrameSize = MFI->getStackSize(); - - // Get processor type. - bool isPPC64 = Subtarget.isPPC64(); - // Get operating system - bool isDarwinABI = Subtarget.isDarwinABI(); - // Check if the link register (LR) has been saved. - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - bool MustSaveLR = FI->mustSaveLR(); - // Do we have a frame pointer for this function? - bool HasFP = hasFP(MF) && FrameSize; - - int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); - - int FPOffset = 0; - if (HasFP) { - if (Subtarget.isSVR4ABI()) { - MachineFrameInfo *FFI = MF.getFrameInfo(); - int FPIndex = FI->getFramePointerSaveIndex(); - assert(FPIndex && "No Frame Pointer Save Slot!"); - FPOffset = FFI->getObjectOffset(FPIndex); - } else { - FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); - } - } - - bool UsesTCRet = RetOpcode == PPC::TCRETURNri || - RetOpcode == PPC::TCRETURNdi || - RetOpcode == PPC::TCRETURNai || - RetOpcode == PPC::TCRETURNri8 || - RetOpcode == PPC::TCRETURNdi8 || - RetOpcode == PPC::TCRETURNai8; - - if (UsesTCRet) { - int MaxTCRetDelta = FI->getTailCallSPDelta(); - MachineOperand &StackAdjust = MBBI->getOperand(1); - assert(StackAdjust.isImm() && "Expecting immediate value."); - // Adjust stack pointer. - int StackAdj = StackAdjust.getImm(); - int Delta = StackAdj - MaxTCRetDelta; - assert((Delta >= 0) && "Delta must be positive"); - if (MaxTCRetDelta>0) - FrameSize += (StackAdj +Delta); - else - FrameSize += StackAdj; - } - - if (FrameSize) { - // The loaded (or persistent) stack pointer value is offset by the 'stwu' - // on entry to the function. Add this offset back now. - if (!isPPC64) { - // If this function contained a fastcc call and GuaranteedTailCallOpt is - // enabled (=> hasFastCall()==true) the fastcc call might contain a tail - // call which invalidates the stack pointer value in SP(0). So we use the - // value of R31 in this case. - if (FI->hasFastCall() && isInt<16>(FrameSize)) { - assert(hasFP(MF) && "Expecting a valid the frame pointer."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) - .addReg(PPC::R31).addImm(FrameSize); - } else if(FI->hasFastCall()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) - .addReg(PPC::R0, RegState::Kill) - .addImm(FrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4)) - .addReg(PPC::R1) - .addReg(PPC::R31) - .addReg(PPC::R0); - } else if (isInt<16>(FrameSize) && - (!ALIGN_STACK || TargetAlign >= MaxAlign) && - !MFI->hasVarSizedObjects()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1) - .addReg(PPC::R1).addImm(FrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1) - .addImm(0).addReg(PPC::R1); - } - } else { - if (FI->hasFastCall() && isInt<16>(FrameSize)) { - assert(hasFP(MF) && "Expecting a valid the frame pointer."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) - .addReg(PPC::X31).addImm(FrameSize); - } else if(FI->hasFastCall()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0) - .addImm(FrameSize >> 16); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) - .addReg(PPC::X0, RegState::Kill) - .addImm(FrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8)) - .addReg(PPC::X1) - .addReg(PPC::X31) - .addReg(PPC::X0); - } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign && - !MFI->hasVarSizedObjects()) { - BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1) - .addReg(PPC::X1).addImm(FrameSize); - } else { - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1) - .addImm(0).addReg(PPC::X1); - } - } - } - - if (isPPC64) { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) - .addImm(LROffset/4).addReg(PPC::X1); - - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31) - .addImm(FPOffset/4).addReg(PPC::X1); - - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0); - } else { - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0) - .addImm(LROffset).addReg(PPC::R1); - - if (HasFP) - BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31) - .addImm(FPOffset).addReg(PPC::R1); - - if (MustSaveLR) - BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0); - } - - // Callee pop calling convention. Pop parameter/linkage area. Used for tail - // call optimization - if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR && - MF.getFunction()->getCallingConv() == CallingConv::Fast) { - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - unsigned CallerAllocatedAmt = FI->getMinReservedArea(); - unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1; - unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; - unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI; - - if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { - BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) - .addReg(StackReg).addImm(CallerAllocatedAmt); - } else { - BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) - .addImm(CallerAllocatedAmt >> 16); - BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) - .addReg(TmpReg, RegState::Kill) - .addImm(CallerAllocatedAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) - .addReg(StackReg) - .addReg(FPReg) - .addReg(TmpReg); - } - } else if (RetOpcode == PPC::TCRETURNdi) { - MBBI = prior(MBB.end()); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); - } else if (RetOpcode == PPC::TCRETURNri) { - MBBI = prior(MBB.end()); - assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); - } else if (RetOpcode == PPC::TCRETURNai) { - MBBI = prior(MBB.end()); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); - } else if (RetOpcode == PPC::TCRETURNdi8) { - MBBI = prior(MBB.end()); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); - } else if (RetOpcode == PPC::TCRETURNri8) { - MBBI = prior(MBB.end()); - assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); - } else if (RetOpcode == PPC::TCRETURNai8) { - MBBI = prior(MBB.end()); - MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); - } -} - unsigned PPCRegisterInfo::getRARegister() const { return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8; } unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (!Subtarget.isPPC64()) - return hasFP(MF) ? PPC::R31 : PPC::R1; + return TFI->hasFP(MF) ? PPC::R31 : PPC::R1; else - return hasFP(MF) ? PPC::X31 : PPC::X1; -} - -void PPCRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) - const { - // Initial state of the frame pointer is R1. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(PPC::R1, 0); - Moves.push_back(MachineMove(0, Dst, Src)); + return TFI->hasFP(MF) ? PPC::X31 : PPC::X1; } unsigned PPCRegisterInfo::getEHExceptionRegister() const { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 890b24b..aa29ffe 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -44,17 +44,10 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const; - /// targetHandlesStackFrameRounding - Returns true if the target is - /// responsible for rounding up the stack frame (probably at emitPrologue - /// time). - bool targetHandlesStackFrameRounding() const { return true; } - /// requiresRegisterScavenging - We require a register scavenger. /// FIXME (64-bit): Should be inlined. bool requiresRegisterScavenging(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -66,21 +59,9 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - /// determineFrameLayout - Determine the size of the frame and maximum call - /// frame size. - void determineFrameLayout(MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 8604f54..2639165 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -300,13 +300,14 @@ def GPRC : RegisterClass<"PPC", [i32], 32, // R31 when the FP is not needed. // When using the 32-bit SVR4 ABI, r13 is reserved for the Small Data Area // pointer. - const PPCSubtarget &Subtarget - = MF.getTarget().getSubtarget<PPCSubtarget>(); - + const PPCSubtarget &Subtarget = MF.getTarget().getSubtarget<PPCSubtarget>(); + const PPCFrameLowering *PPCFI = + static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + if (Subtarget.isPPC64() || Subtarget.isSVR4ABI()) return end()-5; // don't allocate R13, R31, R0, R1, LR - if (needsFP(MF)) + if (PPCFI->needsFP(MF)) return end()-4; // don't allocate R31, R0, R1, LR else return end()-3; // don't allocate R0, R1, LR @@ -331,7 +332,9 @@ def G8RC : RegisterClass<"PPC", [i64], 64, } G8RCClass::iterator G8RCClass::allocation_order_end(const MachineFunction &MF) const { - if (needsFP(MF)) + const PPCFrameLowering *PPCFI = + static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); + if (PPCFI->needsFP(MF)) return end()-5; else return end()-4; diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index 7344763..ad4da1f 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -13,7 +13,7 @@ def G3Itineraries : ProcessorItineraries< - [IU1, IU2, FPU1, BPU, SRU, SLU], [ + [IU1, IU2, FPU1, BPU, SRU, SLU], [], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index 7efc693..03c3b29 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// def G4Itineraries : ProcessorItineraries< - [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [ + [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index 15056c0..00cac3c 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -15,7 +15,7 @@ def IU3 : FuncUnit; // integer unit 3 (7450 simple) def IU4 : FuncUnit; // integer unit 4 (7450 simple) def G4PlusItineraries : ProcessorItineraries< - [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [ + [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index 2dffc48..1671f22 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// def G5Itineraries : ProcessorItineraries< - [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [ + [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>, InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 5d46065..72a1dee 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -129,7 +129,7 @@ void PPCSubtarget::SetJITMode() { /// is required to get the address of the global. bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const { - // We never hae stubs if HasLazyResolverStubs=false or if in static mode. + // We never have stubs if HasLazyResolverStubs=false or if in static mode. if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static) return false; // If symbol visibility is hidden, the extra load is not needed if diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 10cd10b..212b450 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -15,6 +15,7 @@ #include "PPCMCAsmInfo.h" #include "PPCTargetMachine.h" #include "llvm/PassManager.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/FormattedStream.h" @@ -29,6 +30,21 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } +// This is duplicated code. Refactor this. +static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, + MCContext &Ctx, TargetAsmBackend &TAB, + raw_ostream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll, + bool NoExecStack) { + switch (Triple(TT).getOS()) { + case Triple::Darwin: + return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); + default: + return NULL; + } +} + extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); @@ -36,6 +52,19 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo); RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo); + + // Register the MC Code Emitter + TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter); + TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter); + + + // Register the asm backend. + TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend); + TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer); + TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer); } @@ -44,7 +73,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), - FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), + FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 626ddbb..2d24989 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -14,7 +14,7 @@ #ifndef PPC_TARGETMACHINE_H #define PPC_TARGETMACHINE_H -#include "PPCFrameInfo.h" +#include "PPCFrameLowering.h" #include "PPCSubtarget.h" #include "PPCJITInfo.h" #include "PPCInstrInfo.h" @@ -33,7 +33,7 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCSubtarget Subtarget; const TargetData DataLayout; // Calculates type size & alignment PPCInstrInfo InstrInfo; - PPCFrameInfo FrameInfo; + PPCFrameLowering FrameLowering; PPCJITInfo JITInfo; PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; @@ -43,23 +43,25 @@ public: PPCTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool is64Bit); - virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const PPCFrameInfo *getFrameInfo() const { return &FrameInfo; } - virtual PPCJITInfo *getJITInfo() { return &JITInfo; } + virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const PPCFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + virtual PPCJITInfo *getJITInfo() { return &JITInfo; } virtual const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { return &TSInfo; } - virtual const PPCRegisterInfo *getRegisterInfo() const { + virtual const PPCRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData getInstrItineraryData() const { - return InstrItins; + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; } // Pass Pipeline Configuration diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 3465779..349cd89 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -37,6 +37,31 @@ _f3: ori r3, r2, 65535 blr +===-------------------------------------------------------------------------=== + +This code: + +unsigned add32carry(unsigned sum, unsigned x) { + unsigned z = sum + x; + if (sum + x < x) + z++; + return z; +} + +Should compile to something like: + + addc r3,r3,r4 + addze r3,r3 + +instead we get: + + add r3, r4, r3 + cmplw cr7, r3, r4 + mfcr r4 ; 1 + rlwinm r4, r4, 29, 31, 31 + add r3, r3, r4 + +Ick. ===-------------------------------------------------------------------------=== @@ -260,8 +285,8 @@ including having this work sanely. Fix Darwin FP-In-Integer Registers ABI Darwin passes doubles in structures in integer registers, which is very very -bad. Add something like a BIT_CONVERT to LLVM, then do an i-p transformation -that percolates these things out of functions. +bad. Add something like a BITCAST to LLVM, then do an i-p transformation that +percolates these things out of functions. Check out how horrible this is: http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 4faf8bc..4e14fbb 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2,29 +2,6 @@ Target Independent Opportunities: //===---------------------------------------------------------------------===// -Dead argument elimination should be enhanced to handle cases when an argument is -dead to an externally visible function. Though the argument can't be removed -from the externally visible function, the caller doesn't need to pass it in. -For example in this testcase: - - void foo(int X) __attribute__((noinline)); - void foo(int X) { sideeffect(); } - void bar(int A) { foo(A+1); } - -We compile bar to: - -define void @bar(i32 %A) nounwind ssp { - %0 = add nsw i32 %A, 1 ; <i32> [#uses=1] - tail call void @foo(i32 %0) nounwind noinline ssp - ret void -} - -The add is dead, we could pass in 'i32 undef' instead. This occurs for C++ -templates etc, which usually have linkonce_odr/weak_odr linkage, not internal -linkage. - -//===---------------------------------------------------------------------===// - With the recent changes to make the implicit def/use set explicit in machineinstrs, we should change the target descriptions for 'call' instructions so that the .td files don't list all the call-clobbered registers as implicit @@ -41,7 +18,17 @@ This has a number of uses: //===---------------------------------------------------------------------===// -Make the PPC branch selector target independant +We should recognized various "overflow detection" idioms and translate them into +llvm.uadd.with.overflow and similar intrinsics. Here is a multiply idiom: + +unsigned int mul(unsigned int a,unsigned int b) { + if ((unsigned long long)a*b>0xffffffff) + exit(0); + return a*b; +} + +The legalization code for mul-with-overflow needs to be made more robust before +this can be implemented though. //===---------------------------------------------------------------------===// @@ -53,41 +40,6 @@ right). //===---------------------------------------------------------------------===// -Solve this DAG isel folding deficiency: - -int X, Y; - -void fn1(void) -{ - X = X | (Y << 3); -} - -compiles to - -fn1: - movl Y, %eax - shll $3, %eax - orl X, %eax - movl %eax, X - ret - -The problem is the store's chain operand is not the load X but rather -a TokenFactor of the load X and load Y, which prevents the folding. - -There are two ways to fix this: - -1. The dag combiner can start using alias analysis to realize that y/x - don't alias, making the store to X not dependent on the load from Y. -2. The generated isel could be made smarter in the case it can't - disambiguate the pointers. - -Number 1 is the preferred solution. - -This has been "fixed" by a TableGen hack. But that is a short term workaround -which will be removed once the proper fix is made. - -//===---------------------------------------------------------------------===// - On targets with expensive 64-bit multiply, we could LSR this: for (i = ...; ++i) { @@ -300,14 +252,6 @@ unsigned long reverse(unsigned v) { return v ^ (t >> 8); } -Neither is this (very standard idiom): - -int f(int n) -{ - return (((n) << 24) | (((n) & 0xff00) << 8) - | (((n) >> 8) & 0xff00) | ((n) >> 24)); -} - //===---------------------------------------------------------------------===// [LOOP RECOGNITION] @@ -343,8 +287,7 @@ unsigned int popcount(unsigned int input) { return count; } -This is a form of idiom recognition for loops, the same thing that could be -useful for recognizing memset/memcpy. +This sort of thing should be added to the loop idiom pass. //===---------------------------------------------------------------------===// @@ -374,14 +317,6 @@ this construct. //===---------------------------------------------------------------------===// -[LOOP RECOGNITION] - -viterbi speeds up *significantly* if the various "history" related copy loops -are turned into memcpy calls at the source level. We need a "loops to memcpy" -pass. - -//===---------------------------------------------------------------------===// - [LOOP OPTIMIZATION] SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization @@ -607,46 +542,21 @@ struct THotKey { short Key; bool Control; bool Shift; bool Alt; }; extern THotKey m_HotKey; THotKey GetHotKey () { return m_HotKey; } -into (-O3 -fno-exceptions -static -fomit-frame-pointer): - -__Z9GetHotKeyv: - pushl %esi - movl 8(%esp), %eax - movb _m_HotKey+3, %cl - movb _m_HotKey+4, %dl - movb _m_HotKey+2, %ch - movw _m_HotKey, %si - movw %si, (%eax) - movb %ch, 2(%eax) - movb %cl, 3(%eax) - movb %dl, 4(%eax) - popl %esi - ret $4 - -GCC produces: - -__Z9GetHotKeyv: - movl _m_HotKey, %edx - movl 4(%esp), %eax - movl %edx, (%eax) - movzwl _m_HotKey+4, %edx - movw %dx, 4(%eax) - ret $4 - -The LLVM IR contains the needed alignment info, so we should be able to -merge the loads and stores into 4-byte loads: - - %struct.THotKey = type { i16, i8, i8, i8 } -define void @_Z9GetHotKeyv(%struct.THotKey* sret %agg.result) nounwind { -... - %tmp2 = load i16* getelementptr (@m_HotKey, i32 0, i32 0), align 8 - %tmp5 = load i8* getelementptr (@m_HotKey, i32 0, i32 1), align 2 - %tmp8 = load i8* getelementptr (@m_HotKey, i32 0, i32 2), align 1 - %tmp11 = load i8* getelementptr (@m_HotKey, i32 0, i32 3), align 2 - -Alternatively, we should use a small amount of base-offset alias analysis -to make it so the scheduler doesn't need to hold all the loads in regs at -once. +into (-m64 -O3 -fno-exceptions -static -fomit-frame-pointer): + +__Z9GetHotKeyv: ## @_Z9GetHotKeyv + movq _m_HotKey@GOTPCREL(%rip), %rax + movzwl (%rax), %ecx + movzbl 2(%rax), %edx + shlq $16, %rdx + orq %rcx, %rdx + movzbl 3(%rax), %ecx + shlq $24, %rcx + orq %rdx, %rcx + movzbl 4(%rax), %eax + shlq $32, %rax + orq %rcx, %rax + ret //===---------------------------------------------------------------------===// @@ -658,42 +568,35 @@ implementations of ceil/floor/rint. Consider: int test() { - long long input[8] = {1,1,1,1,1,1,1,1}; + long long input[8] = {1,0,1,0,1,0,1,0}; foo(input); } -We currently compile this into a memcpy from a global array since the -initializer is fairly large and not memset'able. This is good, but the memcpy -gets lowered to load/stores in the code generator. This is also ok, except -that the codegen lowering for memcpy doesn't handle the case when the source -is a constant global. This gives us atrocious code like this: +Clang compiles this into: - call "L1$pb" -"L1$pb": - popl %eax - movl _C.0.1444-"L1$pb"+32(%eax), %ecx - movl %ecx, 40(%esp) - movl _C.0.1444-"L1$pb"+20(%eax), %ecx - movl %ecx, 28(%esp) - movl _C.0.1444-"L1$pb"+36(%eax), %ecx - movl %ecx, 44(%esp) - movl _C.0.1444-"L1$pb"+44(%eax), %ecx - movl %ecx, 52(%esp) - movl _C.0.1444-"L1$pb"+40(%eax), %ecx - movl %ecx, 48(%esp) - movl _C.0.1444-"L1$pb"+12(%eax), %ecx - movl %ecx, 20(%esp) - movl _C.0.1444-"L1$pb"+4(%eax), %ecx -... + call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 16, i1 false) + %0 = getelementptr [8 x i64]* %input, i64 0, i64 0 + store i64 1, i64* %0, align 16 + %1 = getelementptr [8 x i64]* %input, i64 0, i64 2 + store i64 1, i64* %1, align 16 + %2 = getelementptr [8 x i64]* %input, i64 0, i64 4 + store i64 1, i64* %2, align 16 + %3 = getelementptr [8 x i64]* %input, i64 0, i64 6 + store i64 1, i64* %3, align 16 -instead of: - movl $1, 16(%esp) - movl $0, 20(%esp) - movl $1, 24(%esp) - movl $0, 28(%esp) - movl $1, 32(%esp) - movl $0, 36(%esp) - ... +Which gets codegen'd into: + + pxor %xmm0, %xmm0 + movaps %xmm0, -16(%rbp) + movaps %xmm0, -32(%rbp) + movaps %xmm0, -48(%rbp) + movaps %xmm0, -64(%rbp) + movq $1, -64(%rbp) + movq $1, -48(%rbp) + movq $1, -32(%rbp) + movq $1, -16(%rbp) + +It would be better to have 4 movq's of 0 instead of the movaps's. //===---------------------------------------------------------------------===// @@ -739,20 +642,6 @@ etc. On X86, we miss a bunch of 'rotate by variable' cases because the rotate matching code in dag combine doesn't look through truncates aggressively enough. Here are some testcases reduces from GCC PR17886: -unsigned long long f(unsigned long long x, int y) { - return (x << y) | (x >> 64-y); -} -unsigned f2(unsigned x, int y){ - return (x << y) | (x >> 32-y); -} -unsigned long long f3(unsigned long long x){ - int y = 9; - return (x << y) | (x >> 64-y); -} -unsigned f4(unsigned x){ - int y = 10; - return (x << y) | (x >> 32-y); -} unsigned long long f5(unsigned long long x, unsigned long long y) { return (x << 8) | ((y >> 48) & 0xffull); } @@ -771,10 +660,50 @@ unsigned long long f6(unsigned long long x, unsigned long long y, int z) { } } -On X86-64, we only handle f2/f3/f4 right. On x86-32, a few of these -generate truly horrible code, instead of using shld and friends. On -ARM, we end up with calls to L___lshrdi3/L___ashldi3 in f, which is -badness. PPC64 misses f, f5 and f6. CellSPU aborts in isel. +//===---------------------------------------------------------------------===// + +This (and similar related idioms): + +unsigned int foo(unsigned char i) { + return i | (i<<8) | (i<<16) | (i<<24); +} + +compiles into: + +define i32 @foo(i8 zeroext %i) nounwind readnone ssp noredzone { +entry: + %conv = zext i8 %i to i32 + %shl = shl i32 %conv, 8 + %shl5 = shl i32 %conv, 16 + %shl9 = shl i32 %conv, 24 + %or = or i32 %shl9, %conv + %or6 = or i32 %or, %shl5 + %or10 = or i32 %or6, %shl + ret i32 %or10 +} + +it would be better as: + +unsigned int bar(unsigned char i) { + unsigned int j=i | (i << 8); + return j | (j<<16); +} + +aka: + +define i32 @bar(i8 zeroext %i) nounwind readnone ssp noredzone { +entry: + %conv = zext i8 %i to i32 + %shl = shl i32 %conv, 8 + %or = or i32 %shl, %conv + %shl5 = shl i32 %or, 16 + %or6 = or i32 %shl5, %or + ret i32 %or6 +} + +or even i*0x01010101, depending on the speed of the multiplier. The best way to +handle this is to canonicalize it to a multiply in IR and have codegen handle +lowering multiplies to shifts on cpus where shifts are faster. //===---------------------------------------------------------------------===// @@ -804,18 +733,6 @@ codegen badness or something else (haven't investigated). //===---------------------------------------------------------------------===// -We miss some instcombines for stuff like this: -void bar (void); -void foo (unsigned int a) { - /* This one is equivalent to a >= (3 << 2). */ - if ((a >> 2) >= 3) - bar (); -} - -A few other related ones are in GCC PR14753. - -//===---------------------------------------------------------------------===// - Divisibility by constant can be simplified (according to GCC PR12849) from being a mulhi to being a mul lo (cheaper). Testcase: @@ -906,16 +823,6 @@ The expression should optimize to something like //===---------------------------------------------------------------------===// -void a(int variable) -{ - if (variable == 4 || variable == 6) - bar(); -} -This should optimize to "if ((variable | 2) == 6)". Currently not -optimized with "clang -emit-llvm-bc | opt -std-compile-opts | llc". - -//===---------------------------------------------------------------------===// - unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;} unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;} @@ -966,6 +873,12 @@ rshift_gt (unsigned int a) if ((a >> 2) > 5) bar (); } + +void neg_eq_cst(unsigned int a) { +if (-a == 123) +bar(); +} + All should simplify to a single comparison. All of these are currently not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". @@ -1033,18 +946,6 @@ Should also combine to x | 8. Currently not optimized with "clang //===---------------------------------------------------------------------===// -int a(int x) {return (x & 8) == 0 ? -1 : -9;} -Should combine to (x | -9) ^ 8. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - -int a(int x) {return (x & 8) == 0 ? -9 : -1;} -Should combine to x | -9. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - int a(int x) {return ((x | -9) ^ 8) & x;} Should combine to x & -9. Currently not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". @@ -1145,6 +1046,77 @@ int test (int a, int b, int c, int g) { It would be better to do the mul once to reduce codesize above the if. This is GCC PR38204. + +//===---------------------------------------------------------------------===// +This simple function from 179.art: + +int winner, numf2s; +struct { double y; int reset; } *Y; + +void find_match() { + int i; + winner = 0; + for (i=0;i<numf2s;i++) + if (Y[i].y > Y[winner].y) + winner =i; +} + +Compiles into (with clang TBAA): + +for.body: ; preds = %for.inc, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.inc ] + %i.01718 = phi i32 [ 0, %bb.nph ], [ %i.01719, %for.inc ] + %tmp4 = getelementptr inbounds %struct.anon* %tmp3, i64 %indvar, i32 0 + %tmp5 = load double* %tmp4, align 8, !tbaa !4 + %idxprom7 = sext i32 %i.01718 to i64 + %tmp10 = getelementptr inbounds %struct.anon* %tmp3, i64 %idxprom7, i32 0 + %tmp11 = load double* %tmp10, align 8, !tbaa !4 + %cmp12 = fcmp ogt double %tmp5, %tmp11 + br i1 %cmp12, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %i.017 = trunc i64 %indvar to i32 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %i.01719 = phi i32 [ %i.01718, %for.body ], [ %i.017, %if.then ] + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %tmp22 + br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body + + +It is good that we hoisted the reloads of numf2's, and Y out of the loop and +sunk the store to winner out. + +However, this is awful on several levels: the conditional truncate in the loop +(-indvars at fault? why can't we completely promote the IV to i64?). + +Beyond that, we have a partially redundant load in the loop: if "winner" (aka +%i.01718) isn't updated, we reload Y[winner].y the next time through the loop. +Similarly, the addressing that feeds it (including the sext) is redundant. In +the end we get this generated assembly: + +LBB0_2: ## %for.body + ## =>This Inner Loop Header: Depth=1 + movsd (%rdi), %xmm0 + movslq %edx, %r8 + shlq $4, %r8 + ucomisd (%rcx,%r8), %xmm0 + jbe LBB0_4 + movl %esi, %edx +LBB0_4: ## %for.inc + addq $16, %rdi + incq %rsi + cmpq %rsi, %rax + jne LBB0_2 + +All things considered this isn't too bad, but we shouldn't need the movslq or +the shlq instruction, or the load folded into ucomisd every time through the +loop. + +On an x86-specific topic, if the loop can't be restructure, the movl should be a +cmov. + //===---------------------------------------------------------------------===// [STORE SINKING] @@ -1216,6 +1188,29 @@ loadpre14.c loadpre15.c actually a conditional increment: loadpre18.c loadpre19.c +//===---------------------------------------------------------------------===// + +[LOAD PRE / STORE SINKING / SPEC HACK] + +This is a chunk of code from 456.hmmer: + +int f(int M, int *mc, int *mpp, int *tpmm, int *ip, int *tpim, int *dpp, + int *tpdm, int xmb, int *bp, int *ms) { + int k, sc; + for (k = 1; k <= M; k++) { + mc[k] = mpp[k-1] + tpmm[k-1]; + if ((sc = ip[k-1] + tpim[k-1]) > mc[k]) mc[k] = sc; + if ((sc = dpp[k-1] + tpdm[k-1]) > mc[k]) mc[k] = sc; + if ((sc = xmb + bp[k]) > mc[k]) mc[k] = sc; + mc[k] += ms[k]; + } +} + +It is very profitable for this benchmark to turn the conditional stores to mc[k] +into a conditional move (select instr in IR) and allow the final store to do the +store. See GCC PR27313 for more details. Note that this is valid to xform even +with the new C++ memory model, since mc[k] is previously loaded and later +stored. //===---------------------------------------------------------------------===// @@ -1261,26 +1256,6 @@ SingleSource/Benchmarks/Misc/dt.c //===---------------------------------------------------------------------===// -A/B get pinned to the stack because we turn an if/then into a select instead -of PRE'ing the load/store. This may be fixable in instcombine: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892 - -struct X { int i; }; -int foo (int x) { - struct X a; - struct X b; - struct X *p; - a.i = 1; - b.i = 2; - if (x) - p = &a; - else - p = &b; - return p->i; -} - -//===---------------------------------------------------------------------===// - Interesting missed case because of control flow flattening (should be 2 loads): http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | @@ -1308,12 +1283,6 @@ void foo (int a, struct T b) simplifylibcalls should do several optimizations for strspn/strcspn: -strcspn(x, "") -> strlen(x) -strcspn("", x) -> 0 -strspn("", x) -> 0 -strspn(x, "") -> strlen(x) -strspn(x, "a") -> strchr(x, 'a')-x - strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn): size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2, @@ -1353,14 +1322,7 @@ Those should be turned into a switch. This is interesting for a couple reasons. First, in this: - %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind - %strlen = call i32 @strlen(i8* %3072) - -The strlen could be replaced with: %strlen = sub %3072, %3073, because the -strcpy call returns a pointer to the end of the string. Based on that, the -endptr GEP just becomes equal to 3073, which eliminates a strlen call and GEP. - -Second, the memcpy+strlen strlen can be replaced with: +The memcpy+strlen strlen can be replaced with: %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly @@ -1436,18 +1398,6 @@ This pattern repeats several times, basically doing: //===---------------------------------------------------------------------===// -186.crafty also contains this code: - -%1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) -%1907 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1906 -%1908 = call i8* @strcpy(i8* %1907, i8* %1905) nounwind align 1 -%1909 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) -%1910 = getelementptr [32 x i8]* @pgn_event, i32 0, i32 %1909 - -The last strlen is computable as 1908-@pgn_event, which means 1910=1908. - -//===---------------------------------------------------------------------===// - 186.crafty has this interesting pattern with the "out.4543" variable: call void @llvm.memcpy.i32( @@ -1509,22 +1459,6 @@ the float directly. //===---------------------------------------------------------------------===// -#include <math.h> -double foo(double a) { return sin(a); } - -This compiles into this on x86-64 Linux: -foo: - subq $8, %rsp - call sin - addq $8, %rsp - ret -vs: - -foo: - jmp sin - -//===---------------------------------------------------------------------===// - The arg promotion pass should make use of nocapture to make its alias analysis stuff much more precise. @@ -1644,21 +1578,6 @@ int bar() { return foo("abcd"); } //===---------------------------------------------------------------------===// -InstCombine should use SimplifyDemandedBits to remove the or instruction: - -define i1 @test(i8 %x, i8 %y) { - %A = or i8 %x, 1 - %B = icmp ugt i8 %A, 3 - ret i1 %B -} - -Currently instcombine calls SimplifyDemandedBits with either all bits or just -the sign bit, if the comparison is obviously a sign test. In this case, we only -need all but the bottom two bits from %A, and if we gave that mask to SDB it -would delete the or instruction for us. - -//===---------------------------------------------------------------------===// - functionattrs doesn't know much about memcpy/memset. This function should be marked readnone rather than readonly, since it only twiddles local memory, but functionattrs doesn't handle memset/memcpy/memmove aggressively: @@ -1674,6 +1593,10 @@ int foo() { return **p; } +This can be seen at: +$ clang t.c -S -o - -mkernel -O0 -emit-llvm | opt -functionattrs -S + + //===---------------------------------------------------------------------===// Missed instcombine transformation: @@ -1689,14 +1612,6 @@ This should be optimized to a single compare. Testcase derived from gcc. //===---------------------------------------------------------------------===// -Missed instcombine transformation: -void b(); -void a(int x) { if (((1<<x)&8)==0) b(); } - -The shift should be optimized out. Testcase derived from gcc. - -//===---------------------------------------------------------------------===// - Missed instcombine or reassociate transformation: int a(int a, int b) { return (a==12)&(b>47)&(b<58); } @@ -1706,28 +1621,35 @@ from gcc. //===---------------------------------------------------------------------===// Missed instcombine transformation: -define i32 @a(i32 %x) nounwind readnone { -entry: - %rem = srem i32 %x, 32 - %shl = shl i32 1, %rem - ret i32 %shl -} -The srem can be transformed to an and because if x is negative, the shift is -undefined. Testcase derived from gcc. + %382 = srem i32 %tmp14.i, 64 ; [#uses=1] + %383 = zext i32 %382 to i64 ; [#uses=1] + %384 = shl i64 %381, %383 ; [#uses=1] + %385 = icmp slt i32 %tmp14.i, 64 ; [#uses=1] + +The srem can be transformed to an and because if %tmp14.i is negative, the +shift is undefined. Testcase derived from 403.gcc. //===---------------------------------------------------------------------===// -Missed instcombine/dagcombine transformation: -define i32 @a(i32 %x, i32 %y) nounwind readnone { -entry: - %mul = mul i32 %y, -8 - %sub = sub i32 %x, %mul - ret i32 %sub -} +This is a range comparison on a divided result (from 403.gcc): -Should compile to something like x+y*8, but currently compiles to an -inefficient result. Testcase derived from gcc. + %1337 = sdiv i32 %1336, 8 ; [#uses=1] + %.off.i208 = add i32 %1336, 7 ; [#uses=1] + %1338 = icmp ult i32 %.off.i208, 15 ; [#uses=1] + +We already catch this (removing the sdiv) if there isn't an add, we should +handle the 'add' as well. This is a common idiom with it's builtin_alloca code. +C testcase: + +int a(int x) { return (unsigned)(x/16+7) < 15; } + +Another similar case involves truncations on 64-bit targets: + + %361 = sdiv i64 %.046, 8 ; [#uses=1] + %362 = trunc i64 %361 to i32 ; [#uses=2] +... + %367 = icmp eq i32 %362, 0 ; [#uses=1] //===---------------------------------------------------------------------===// @@ -1855,13 +1777,12 @@ entry: } Generated code: - addq %rcx, %rdx - movl $0, %eax - adcq $0, %rax - addq %r8, %rax - movq %rax, (%rdi) - movq %rdx, (%rsi) - ret + addq %rcx, %rdx + sbbq %rax, %rax + subq %rax, %r8 + movq %r8, (%rdi) + movq %rdx, (%rsi) + ret Expected code: addq %rcx, %rdx @@ -1870,12 +1791,6 @@ Expected code: movq %rdx, (%rsi) ret -The generated SelectionDAG has an ADD of an ADDE, where both operands of the -ADDE are zero. Replacing one of the operands of the ADDE with the other operand -of the ADD, and replacing the ADD with the ADDE, should give the desired result. - -(That said, we are doing a lot better than gcc on this testcase. :) ) - //===---------------------------------------------------------------------===// Switch lowering generates less than ideal code for the following switch: @@ -1919,21 +1834,433 @@ something like the following, which eliminates a branch: ret .LBB0_2: jmp foo # TAILCALL + +//===---------------------------------------------------------------------===// + +We compile this: + +int foo(int a) { return (a & (~15)) / 16; } + +Into: + +define i32 @foo(i32 %a) nounwind readnone ssp { +entry: + %and = and i32 %a, -16 + %div = sdiv i32 %and, 16 + ret i32 %div +} + +but this code (X & -A)/A is X >> log2(A) when A is a power of 2, so this case +should be instcombined into just "a >> 4". + +We do get this at the codegen level, so something knows about it, but +instcombine should catch it earlier: + +_foo: ## @foo +## BB#0: ## %entry + movl %edi, %eax + sarl $4, %eax + ret + +//===---------------------------------------------------------------------===// + +This code (from GCC PR28685): + +int test(int a, int b) { + int lt = a < b; + int eq = a == b; + if (lt) + return 1; + return eq; +} + +Is compiled to: + +define i32 @test(i32 %a, i32 %b) nounwind readnone ssp { +entry: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %cmp5 = icmp eq i32 %a, %b + %conv6 = zext i1 %cmp5 to i32 + ret i32 %conv6 + +return: ; preds = %entry + ret i32 1 +} + +it could be: + +define i32 @test__(i32 %a, i32 %b) nounwind readnone ssp { +entry: + %0 = icmp sle i32 %a, %b + %retval = zext i1 %0 to i32 + ret i32 %retval +} + +//===---------------------------------------------------------------------===// + +This code can be seen in viterbi: + + %64 = call noalias i8* @malloc(i64 %62) nounwind +... + %67 = call i64 @llvm.objectsize.i64(i8* %64, i1 false) nounwind + %68 = call i8* @__memset_chk(i8* %64, i32 0, i64 %62, i64 %67) nounwind + +llvm.objectsize.i64 should be taught about malloc/calloc, allowing it to +fold to %62. This is a security win (overflows of malloc will get caught) +and also a performance win by exposing more memsets to the optimizer. + +This occurs several times in viterbi. + +Note that this would change the semantics of @llvm.objectsize which by its +current definition always folds to a constant. We also should make sure that +we remove checking in code like + + char *p = malloc(strlen(s)+1); + __strcpy_chk(p, s, __builtin_objectsize(p, 0)); + +//===---------------------------------------------------------------------===// + +This code (from Benchmarks/Dhrystone/dry.c): + +define i32 @Func1(i32, i32) nounwind readnone optsize ssp { +entry: + %sext = shl i32 %0, 24 + %conv = ashr i32 %sext, 24 + %sext6 = shl i32 %1, 24 + %conv4 = ashr i32 %sext6, 24 + %cmp = icmp eq i32 %conv, %conv4 + %. = select i1 %cmp, i32 10000, i32 0 + ret i32 %. +} + +Should be simplified into something like: + +define i32 @Func1(i32, i32) nounwind readnone optsize ssp { +entry: + %sext = shl i32 %0, 24 + %conv = and i32 %sext, 0xFF000000 + %sext6 = shl i32 %1, 24 + %conv4 = and i32 %sext6, 0xFF000000 + %cmp = icmp eq i32 %conv, %conv4 + %. = select i1 %cmp, i32 10000, i32 0 + ret i32 %. +} + +and then to: + +define i32 @Func1(i32, i32) nounwind readnone optsize ssp { +entry: + %conv = and i32 %0, 0xFF + %conv4 = and i32 %1, 0xFF + %cmp = icmp eq i32 %conv, %conv4 + %. = select i1 %cmp, i32 10000, i32 0 + ret i32 %. +} +//===---------------------------------------------------------------------===// + +clang -O3 currently compiles this code + +int g(unsigned int a) { + unsigned int c[100]; + c[10] = a; + c[11] = a; + unsigned int b = c[10] + c[11]; + if(b > a*2) a = 4; + else a = 8; + return a + 7; +} + +into + +define i32 @g(i32 a) nounwind readnone { + %add = shl i32 %a, 1 + %mul = shl i32 %a, 1 + %cmp = icmp ugt i32 %add, %mul + %a.addr.0 = select i1 %cmp, i32 11, i32 15 + ret i32 %a.addr.0 +} + +The icmp should fold to false. This CSE opportunity is only available +after GVN and InstCombine have run. + +//===---------------------------------------------------------------------===// + +memcpyopt should turn this: + +define i8* @test10(i32 %x) { + %alloc = call noalias i8* @malloc(i32 %x) nounwind + call void @llvm.memset.p0i8.i32(i8* %alloc, i8 0, i32 %x, i32 1, i1 false) + ret i8* %alloc +} + +into a call to calloc. We should make sure that we analyze calloc as +aggressively as malloc though. + +//===---------------------------------------------------------------------===// + +clang -O3 doesn't optimize this: + +void f1(int* begin, int* end) { + std::fill(begin, end, 0); +} + +into a memset. This is PR8942. + //===---------------------------------------------------------------------===// -Given a branch where the two target blocks are identical ("ret i32 %b" in -both), simplifycfg will simplify them away. But not so for a switch statement: -define i32 @f(i32 %a, i32 %b) nounwind readnone { +clang -O3 -fno-exceptions currently compiles this code: + +void f(int N) { + std::vector<int> v(N); + + extern void sink(void*); sink(&v); +} + +into + +define void @_Z1fi(i32 %N) nounwind { entry: - switch i32 %a, label %bb3 [ - i32 4, label %bb - i32 6, label %bb - ] + %v2 = alloca [3 x i32*], align 8 + %v2.sub = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 0 + %tmpcast = bitcast [3 x i32*]* %v2 to %"class.std::vector"* + %conv = sext i32 %N to i64 + store i32* null, i32** %v2.sub, align 8, !tbaa !0 + %tmp3.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 1 + store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 + %tmp4.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 2 + store i32* null, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 + %cmp.i.i.i.i = icmp eq i32 %N, 0 + br i1 %cmp.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i, label %cond.true.i.i.i.i + +_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i: ; preds = %entry + store i32* null, i32** %v2.sub, align 8, !tbaa !0 + store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 + %add.ptr.i5.i.i = getelementptr inbounds i32* null, i64 %conv + store i32* %add.ptr.i5.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 + br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit + +cond.true.i.i.i.i: ; preds = %entry + %cmp.i.i.i.i.i = icmp slt i32 %N, 0 + br i1 %cmp.i.i.i.i.i, label %if.then.i.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i + +if.then.i.i.i.i.i: ; preds = %cond.true.i.i.i.i + call void @_ZSt17__throw_bad_allocv() noreturn nounwind + unreachable + +_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i: ; preds = %cond.true.i.i.i.i + %mul.i.i.i.i.i = shl i64 %conv, 2 + %call3.i.i.i.i.i = call noalias i8* @_Znwm(i64 %mul.i.i.i.i.i) nounwind + %0 = bitcast i8* %call3.i.i.i.i.i to i32* + store i32* %0, i32** %v2.sub, align 8, !tbaa !0 + store i32* %0, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0 + %add.ptr.i.i.i = getelementptr inbounds i32* %0, i64 %conv + store i32* %add.ptr.i.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0 + call void @llvm.memset.p0i8.i64(i8* %call3.i.i.i.i.i, i8 0, i64 %mul.i.i.i.i.i, i32 4, i1 false) + br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit + +This is just the handling the construction of the vector. Most surprising here +is the fact that all three null stores in %entry are dead (because we do no +cross-block DSE). + +Also surprising is that %conv isn't simplified to 0 in %....exit.thread.i.i. +This is a because the client of LazyValueInfo doesn't simplify all instruction +operands, just selected ones. + +//===---------------------------------------------------------------------===// + +clang -O3 -fno-exceptions currently compiles this code: + +void f(char* a, int n) { + __builtin_memset(a, 0, n); + for (int i = 0; i < n; ++i) + a[i] = 0; +} + +into: + +define void @_Z1fPci(i8* nocapture %a, i32 %n) nounwind { +entry: + %conv = sext i32 %n to i64 + tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %conv, i32 1, i1 false) + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %tmp10 = add i32 %n, -1 + %tmp11 = zext i32 %tmp10 to i64 + %tmp12 = add i64 %tmp11, 1 + call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %tmp12, i32 1, i1 false) + ret void + +for.end: ; preds = %entry + ret void +} + +This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold +the two memset's together. The issue with %n seems to stem from poor handling +of the original loop. + +To simplify this, we need SCEV to know that "n != 0" because of the dominating +conditional. That would turn the second memset into a simple memset of 'n'. + +//===---------------------------------------------------------------------===// + +clang -O3 -fno-exceptions currently compiles this code: -bb: ; preds = %entry, %entry - ret i32 %b +struct S { + unsigned short m1, m2; + unsigned char m3, m4; +}; -bb3: ; preds = %entry - ret i32 %b +void f(int N) { + std::vector<S> v(N); + extern void sink(void*); sink(&v); } + +into poor code for zero-initializing 'v' when N is >0. The problem is that +S is only 6 bytes, but each element is 8 byte-aligned. We generate a loop and +4 stores on each iteration. If the struct were 8 bytes, this gets turned into +a memset. + +In order to handle this we have to: + A) Teach clang to generate metadata for memsets of structs that have holes in + them. + B) Teach clang to use such a memset for zero init of this struct (since it has + a hole), instead of doing elementwise zeroing. + //===---------------------------------------------------------------------===// + +clang -O3 currently compiles this code: + +extern const int magic; +double f() { return 0.0 * magic; } + +into + +@magic = external constant i32 + +define double @_Z1fv() nounwind readnone { +entry: + %tmp = load i32* @magic, align 4, !tbaa !0 + %conv = sitofp i32 %tmp to double + %mul = fmul double %conv, 0.000000e+00 + ret double %mul +} + +We should be able to fold away this fmul to 0.0. More generally, fmul(x,0.0) +can be folded to 0.0 if we can prove that the LHS is not -0.0, not a NaN, and +not an INF. The CannotBeNegativeZero predicate in value tracking should be +extended to support general "fpclassify" operations that can return +yes/no/unknown for each of these predicates. + +In this predicate, we know that uitofp is trivially never NaN or -0.0, and +we know that it isn't +/-Inf if the floating point type has enough exponent bits +to represent the largest integer value as < inf. + +//===---------------------------------------------------------------------===// + +When optimizing a transformation that can change the sign of 0.0 (such as the +0.0*val -> 0.0 transformation above), it might be provable that the sign of the +expression doesn't matter. For example, by the above rules, we can't transform +fmul(sitofp(x), 0.0) into 0.0, because x might be -1 and the result of the +expression is defined to be -0.0. + +If we look at the uses of the fmul for example, we might be able to prove that +all uses don't care about the sign of zero. For example, if we have: + + fadd(fmul(sitofp(x), 0.0), 2.0) + +Since we know that x+2.0 doesn't care about the sign of any zeros in X, we can +transform the fmul to 0.0, and then the fadd to 2.0. + +//===---------------------------------------------------------------------===// + +We should enhance memcpy/memcpy/memset to allow a metadata node on them +indicating that some bytes of the transfer are undefined. This is useful for +frontends like clang when lowering struct copies, when some elements of the +struct are undefined. Consider something like this: + +struct x { + char a; + int b[4]; +}; +void foo(struct x*P); +struct x testfunc() { + struct x V1, V2; + foo(&V1); + V2 = V1; + + return V2; +} + +We currently compile this to: +$ clang t.c -S -o - -O0 -emit-llvm | opt -scalarrepl -S + + +%struct.x = type { i8, [4 x i32] } + +define void @testfunc(%struct.x* sret %agg.result) nounwind ssp { +entry: + %V1 = alloca %struct.x, align 4 + call void @foo(%struct.x* %V1) + %tmp1 = bitcast %struct.x* %V1 to i8* + %0 = bitcast %struct.x* %V1 to i160* + %srcval1 = load i160* %0, align 4 + %tmp2 = bitcast %struct.x* %agg.result to i8* + %1 = bitcast %struct.x* %agg.result to i160* + store i160 %srcval1, i160* %1, align 4 + ret void +} + +This happens because SRoA sees that the temp alloca has is being memcpy'd into +and out of and it has holes and it has to be conservative. If we knew about the +holes, then this could be much much better. + +Having information about these holes would also improve memcpy (etc) lowering at +llc time when it gets inlined, because we can use smaller transfers. This also +avoids partial register stalls in some important cases. + +//===---------------------------------------------------------------------===// + +We don't fold (icmp (add) (add)) unless the two adds only have a single use. +There are a lot of cases that we're refusing to fold in (e.g.) 256.bzip2, for +example: + + %indvar.next90 = add i64 %indvar89, 1 ;; Has 2 uses + %tmp96 = add i64 %tmp95, 1 ;; Has 1 use + %exitcond97 = icmp eq i64 %indvar.next90, %tmp96 + +We don't fold this because we don't want to introduce an overlapped live range +of the ivar. However if we can make this more aggressive without causing +performance issues in two ways: + +1. If *either* the LHS or RHS has a single use, we can definitely do the + transformation. In the overlapping liverange case we're trading one register + use for one fewer operation, which is a reasonable trade. Before doing this + we should verify that the llc output actually shrinks for some benchmarks. +2. If both ops have multiple uses, we can still fold it if the operations are + both sinkable to *after* the icmp (e.g. in a subsequent block) which doesn't + increase register pressure. + +There are a ton of icmp's we aren't simplifying because of the reg pressure +concern. Care is warranted here though because many of these are induction +variables and other cases that matter a lot to performance, like the above. +Here's a blob of code that you can drop into the bottom of visitICmp to see some +missed cases: + + { Value *A, *B, *C, *D; + if (match(Op0, m_Add(m_Value(A), m_Value(B))) && + match(Op1, m_Add(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + errs() << "OP0 = " << *Op0 << " U=" << Op0->getNumUses() << "\n"; + errs() << "OP1 = " << *Op1 << " U=" << Op1->getNumUses() << "\n"; + errs() << "CMP = " << I << "\n\n"; + } + } + +//===---------------------------------------------------------------------===// + + diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt deleted file mode 100644 index da629f6..0000000 --- a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMSparcAsmPrinter - SparcAsmPrinter.cpp - ) -add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen) diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile deleted file mode 100644 index fe47538..0000000 --- a/lib/Target/Sparc/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Sparc/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMSparcAsmPrinter - -# Hack: we need to include 'main' Sparc target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp deleted file mode 100644 index ab948bb..0000000 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ /dev/null @@ -1,249 +0,0 @@ -//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format SPARC assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "Sparc.h" -#include "SparcInstrInfo.h" -#include "SparcTargetMachine.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class SparcAsmPrinter : public AsmPrinter { - public: - explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "Sparc Assembly Printer"; - } - - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); - - virtual void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd. - static const char *getRegisterName(unsigned RegNo); - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS); - - virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) - const; - }; -} // end of anonymous namespace - -#include "SparcGenAsmWriter.inc" - -void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand (opNum); - bool CloseParen = false; - if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) { - O << "%hi("; - CloseParen = true; - } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) && - !MO.isReg() && !MO.isImm()) { - O << "%lo("; - CloseParen = true; - } - switch (MO.getType()) { - case MachineOperand::MO_Register: - O << "%" << LowercaseString(getRegisterName(MO.getReg())); - break; - - case MachineOperand::MO_Immediate: - O << (int)MO.getImm(); - break; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - break; - case MachineOperand::MO_ExternalSymbol: - O << MO.getSymbolName(); - break; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" - << MO.getIndex(); - break; - default: - llvm_unreachable("<unknown operand type>"); - } - if (CloseParen) O << ")"; -} - -void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, - raw_ostream &O, const char *Modifier) { - printOperand(MI, opNum, O); - - // If this is an ADD operand, emit it like normal operands. - if (Modifier && !strcmp(Modifier, "arith")) { - O << ", "; - printOperand(MI, opNum+1, O); - return; - } - - if (MI->getOperand(opNum+1).isReg() && - MI->getOperand(opNum+1).getReg() == SP::G0) - return; // don't print "+%g0" - if (MI->getOperand(opNum+1).isImm() && - MI->getOperand(opNum+1).getImm() == 0) - return; // don't print "+0" - - O << "+"; - if (MI->getOperand(opNum+1).isGlobal() || - MI->getOperand(opNum+1).isCPI()) { - O << "%lo("; - printOperand(MI, opNum+1, O); - O << ")"; - } else { - printOperand(MI, opNum+1, O); - } -} - -bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, - raw_ostream &O) { - std::string operand = ""; - const MachineOperand &MO = MI->getOperand(opNum); - switch (MO.getType()) { - default: assert(0 && "Operand is not a register "); - case MachineOperand::MO_Register: - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - "Operand is not a physical register "); - operand = "%" + LowercaseString(getRegisterName(MO.getReg())); - break; - } - - unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber(); - unsigned bbNum = MI->getParent()->getNumber(); - - O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n"; - O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ; - - O << "\t sethi\t" - << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), " - << operand << '\n' ; - - O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ; - O << "\tor\t" << operand - << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum - << ")), " << operand << '\n'; - O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; - - return true; -} - -void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - int CC = (int)MI->getOperand(opNum).getImm(); - O << SPARCCondCodeToString((SPCC::CondCodes)CC); -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: return true; // Unknown modifier. - case 'r': - break; - } - } - - printOperand(MI, OpNo, O); - - return false; -} - -bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier - - O << '['; - printMemOperand(MI, OpNo, O); - O << ']'; - - return false; -} - -/// isBlockOnlyReachableByFallthough - Return true if the basic block has -/// exactly one predecessor and the control transfer mechanism between -/// the predecessor and this block is a fall-through. -/// -/// This overrides AsmPrinter's implementation to handle delay slots. -bool SparcAsmPrinter:: -isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { - // If this is a landing pad, it isn't a fall through. If it has no preds, - // then nothing falls through to it. - if (MBB->isLandingPad() || MBB->pred_empty()) - return false; - - // If there isn't exactly one predecessor, it can't be a fall through. - MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; - ++PI2; - if (PI2 != MBB->pred_end()) - return false; - - // The predecessor has to be immediately before this block. - const MachineBasicBlock *Pred = *PI; - - if (!Pred->isLayoutSuccessor(MBB)) - return false; - - // Check if the last terminator is an unconditional branch. - MachineBasicBlock::const_iterator I = Pred->end(); - while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) - ; // Noop - return I == Pred->end() || !I->getDesc().isBarrier(); -} - - - -// Force static initialization. -extern "C" void LLVMInitializeSparcAsmPrinter() { - RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); - RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target); -} diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 684cadf..6839234 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -13,9 +13,11 @@ tablegen(SparcGenCallingConv.inc -gen-callingconv) add_llvm_target(SparcCodeGen DelaySlotFiller.cpp FPMover.cpp + SparcAsmPrinter.cpp SparcInstrInfo.cpp SparcISelDAGToDAG.cpp SparcISelLowering.cpp + SparcFrameLowering.cpp SparcMCAsmInfo.cpp SparcRegisterInfo.cpp SparcSubtarget.cpp @@ -23,4 +25,4 @@ add_llvm_target(SparcCodeGen SparcSelectionDAGInfo.cpp ) -target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG) +add_subdirectory(TargetInfo) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index aae5da8..ee29275 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -7,21 +7,32 @@ // //===----------------------------------------------------------------------===// // -// This is a simple local pass that fills delay slots with NOPs. -// +// This is a simple local pass that attempts to fill delay slots with useful +// instructions. If no instructions can be moved into the delay slot, then a +// NOP is placed. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "delayslotfiller" +#define DEBUG_TYPE "delay-slot-filler" #include "Sparc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" + using namespace llvm; STATISTIC(FilledSlots, "Number of delay slots filled"); +static cl::opt<bool> DisableDelaySlotFiller( + "disable-sparc-delay-filler", + cl::init(false), + cl::desc("Disable the Sparc delay slot filler."), + cl::Hidden); + namespace { struct Filler : public MachineFunctionPass { /// Target machine description which we query for reg. names, data @@ -47,6 +58,28 @@ namespace { return Changed; } + bool isDelayFiller(MachineBasicBlock &MBB, + MachineBasicBlock::iterator candidate); + + void insertCallUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegUses); + + void insertDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses); + + bool IsRegInSet(SmallSet<unsigned, 32>& RegSet, + unsigned Reg); + + bool delayHasHazard(MachineBasicBlock::iterator candidate, + bool &sawLoad, bool &sawStore, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses); + + MachineBasicBlock::iterator + findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot); + + }; char Filler::ID = 0; } // end of anonymous namespace @@ -59,18 +92,201 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { } /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. -/// Currently, we fill delay slots with NOPs. We assume there is only one -/// delay slot per delayed instruction. +/// We assume there is only one delay slot per delayed instruction. /// bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) if (I->getDesc().hasDelaySlot()) { + MachineBasicBlock::iterator D = MBB.end(); MachineBasicBlock::iterator J = I; - ++J; - BuildMI(MBB, J, DebugLoc(), TII->get(SP::NOP)); + + if (!DisableDelaySlotFiller) + D = findDelayInstr(MBB, I); + ++FilledSlots; Changed = true; + + if (D == MBB.end()) + BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); + else + MBB.splice(++J, &MBB, D); } return Changed; } + +MachineBasicBlock::iterator +Filler::findDelayInstr(MachineBasicBlock &MBB, + MachineBasicBlock::iterator slot) +{ + SmallSet<unsigned, 32> RegDefs; + SmallSet<unsigned, 32> RegUses; + bool sawLoad = false; + bool sawStore = false; + + MachineBasicBlock::iterator I = slot; + + if (slot->getOpcode() == SP::RET) + return MBB.end(); + + if (slot->getOpcode() == SP::RETL) { + --I; + if (I->getOpcode() != SP::RESTORErr) + return MBB.end(); + //change retl to ret + slot->setDesc(TII->get(SP::RET)); + return I; + } + + //Call's delay filler can def some of call's uses. + if (slot->getDesc().isCall()) + insertCallUses(slot, RegUses); + else + insertDefsUses(slot, RegDefs, RegUses); + + bool done = false; + + while (!done) { + done = (I == MBB.begin()); + + if (!done) + --I; + + // skip debug value + if (I->isDebugValue()) + continue; + + + if (I->hasUnmodeledSideEffects() + || I->isInlineAsm() + || I->isLabel() + || I->getDesc().hasDelaySlot() + || isDelayFiller(MBB, I)) + break; + + if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) { + insertDefsUses(I, RegDefs, RegUses); + continue; + } + + return I; + } + return MBB.end(); +} + +bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, + bool &sawLoad, + bool &sawStore, + SmallSet<unsigned, 32> &RegDefs, + SmallSet<unsigned, 32> &RegUses) +{ + + if (candidate->isImplicitDef() || candidate->isKill()) + return true; + + if (candidate->getDesc().mayLoad()) { + sawLoad = true; + if (sawStore) + return true; + } + + if (candidate->getDesc().mayStore()) { + if (sawStore) + return true; + sawStore = true; + if (sawLoad) + return true; + } + + for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) { + const MachineOperand &MO = candidate->getOperand(i); + if (!MO.isReg()) + continue; // skip + + unsigned Reg = MO.getReg(); + + if (MO.isDef()) { + //check whether Reg is defined or used before delay slot. + if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg)) + return true; + } + if (MO.isUse()) { + //check whether Reg is defined before delay slot. + if (IsRegInSet(RegDefs, Reg)) + return true; + } + } + return false; +} + + +void Filler::insertCallUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegUses) +{ + + switch(MI->getOpcode()) { + default: llvm_unreachable("Unknown opcode."); + case SP::CALL: break; + case SP::JMPLrr: + case SP::JMPLri: + assert(MI->getNumOperands() >= 2); + const MachineOperand &Reg = MI->getOperand(0); + assert(Reg.isReg() && "JMPL first operand is not a register."); + assert(Reg.isUse() && "JMPL first operand is not a use."); + RegUses.insert(Reg.getReg()); + + const MachineOperand &RegOrImm = MI->getOperand(1); + if (RegOrImm.isImm()) + break; + assert(RegOrImm.isReg() && "JMPLrr second operand is not a register."); + assert(RegOrImm.isUse() && "JMPLrr second operand is not a use."); + RegUses.insert(RegOrImm.getReg()); + break; + } +} + +//Insert Defs and Uses of MI into the sets RegDefs and RegUses. +void Filler::insertDefsUses(MachineBasicBlock::iterator MI, + SmallSet<unsigned, 32>& RegDefs, + SmallSet<unsigned, 32>& RegUses) +{ + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (MO.isDef()) + RegDefs.insert(Reg); + if (MO.isUse()) + RegUses.insert(Reg); + + } +} + +//returns true if the Reg or its alias is in the RegSet. +bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) +{ + if (RegSet.count(Reg)) + return true; + // check Aliased Registers + for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg); + *Alias; ++ Alias) + if (RegSet.count(*Alias)) + return true; + + return false; +} + +// return true if the candidate is a delay filler. +bool Filler::isDelayFiller(MachineBasicBlock &MBB, + MachineBasicBlock::iterator candidate) +{ + if (candidate == MBB.begin()) + return false; + const TargetInstrDesc &prevdesc = (--candidate)->getDesc(); + return prevdesc.hasDelaySlot(); +} diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile index e407848..27942c5 100644 --- a/lib/Target/Sparc/Makefile +++ b/lib/Target/Sparc/Makefile @@ -17,7 +17,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \ SparcGenInstrInfo.inc SparcGenAsmWriter.inc \ SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc -DIRS = AsmPrinter TargetInfo +DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp new file mode 100644 index 0000000..edde842 --- /dev/null +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -0,0 +1,251 @@ +//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format SPARC assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Sparc.h" +#include "SparcInstrInfo.h" +#include "SparcTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class SparcAsmPrinter : public AsmPrinter { + public: + explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "Sparc Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); + void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); + + virtual void EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); + } + void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd. + static const char *getRegisterName(unsigned RegNo); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS); + + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const; + }; +} // end of anonymous namespace + +#include "SparcGenAsmWriter.inc" + +void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand (opNum); + bool CloseParen = false; + if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) { + O << "%hi("; + CloseParen = true; + } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) && + !MO.isReg() && !MO.isImm()) { + O << "%lo("; + CloseParen = true; + } + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << "%" << LowercaseString(getRegisterName(MO.getReg())); + break; + + case MachineOperand::MO_Immediate: + O << (int)MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + break; + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + break; + default: + llvm_unreachable("<unknown operand type>"); + } + if (CloseParen) O << ")"; +} + +void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + printOperand(MI, opNum, O); + + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + O << ", "; + printOperand(MI, opNum+1, O); + return; + } + + if (MI->getOperand(opNum+1).isReg() && + MI->getOperand(opNum+1).getReg() == SP::G0) + return; // don't print "+%g0" + if (MI->getOperand(opNum+1).isImm() && + MI->getOperand(opNum+1).getImm() == 0) + return; // don't print "+0" + + O << "+"; + if (MI->getOperand(opNum+1).isGlobal() || + MI->getOperand(opNum+1).isCPI()) { + O << "%lo("; + printOperand(MI, opNum+1, O); + O << ")"; + } else { + printOperand(MI, opNum+1, O); + } +} + +bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, + raw_ostream &O) { + std::string operand = ""; + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + default: assert(0 && "Operand is not a register "); + case MachineOperand::MO_Register: + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Operand is not a physical register "); + assert(MO.getReg() != SP::O7 && + "%o7 is assigned as destination for getpcx!"); + operand = "%" + LowercaseString(getRegisterName(MO.getReg())); + break; + } + + unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber(); + unsigned bbNum = MI->getParent()->getNumber(); + + O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n"; + O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ; + + O << "\t sethi\t" + << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum + << ")), " << operand << '\n' ; + + O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ; + O << "\tor\t" << operand + << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum + << ")), " << operand << '\n'; + O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; + + return true; +} + +void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + int CC = (int)MI->getOperand(opNum).getImm(); + O << SPARCCondCodeToString((SPCC::CondCodes)CC); +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'r': + break; + } + } + + printOperand(MI, OpNo, O); + + return false; +} + +bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printMemOperand(MI, OpNo, O); + O << ']'; + + return false; +} + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +/// +/// This overrides AsmPrinter's implementation to handle delay slots. +bool SparcAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // Check if the last terminator is an unconditional branch. + MachineBasicBlock::const_iterator I = Pred->end(); + while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + ; // Noop + return I == Pred->end() || !I->getDesc().isBarrier(); +} + + + +// Force static initialization. +extern "C" void LLVMInitializeSparcAsmPrinter() { + RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget); + RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target); +} diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td index 33ecfdf..856f87a 100644 --- a/lib/Target/Sparc/SparcCallingConv.td +++ b/lib/Target/Sparc/SparcCallingConv.td @@ -24,9 +24,13 @@ def RetCC_Sparc32 : CallingConv<[ // Sparc 32-bit C Calling convention. def CC_Sparc32 : CallingConv<[ - // All arguments get passed in integer registers if there is space. - CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, - + //Custom assign SRet to [sp+64]. + CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>, + // i32 f32 arguments get passed in integer registers if there is space. + CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, + // f64 arguments are split and passed through registers or through stack. + CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>, + // Alternatively, they are assigned to the stack in 4-byte aligned units. CCAssignToStack<4, 4> ]>; diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp new file mode 100644 index 0000000..320c8ca --- /dev/null +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -0,0 +1,80 @@ +//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Sparc implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SparcFrameLowering.h" +#include "SparcInstrInfo.h" +#include "SparcMachineFunctionInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const SparcInstrInfo &TII = + *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Get the number of bytes to allocate from the FrameInfo + int NumBytes = (int) MFI->getStackSize(); + + // Emit the correct save instruction based on the number of bytes in + // the frame. Minimum stack frame size according to V8 ABI is: + // 16 words for register window spill + // 1 word for address of returned aggregate-value + // + 6 words for passing parameters on the stack + // ---------- + // 23 words * 4 bytes per word = 92 bytes + NumBytes += 92; + + // Round up to next doubleword boundary -- a double-word boundary + // is required by the ABI. + NumBytes = (NumBytes + 7) & ~7; + NumBytes = -NumBytes; + + if (NumBytes >= -4096) { + BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6) + .addReg(SP::O6).addImm(NumBytes); + } else { + // Emit this the hard way. This clobbers G1 which we always know is + // available here. + unsigned OffHi = (unsigned)NumBytes >> 10U; + BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); + // Emit G1 = G1 + I6 + BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) + .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); + BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6) + .addReg(SP::O6).addReg(SP::G1); + } +} + +void SparcFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const SparcInstrInfo &TII = + *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + assert(MBBI->getOpcode() == SP::RETL && + "Can only put epilog before 'retl' instruction!"); + BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) + .addReg(SP::G0); +} diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h new file mode 100644 index 0000000..9a2ddc8 --- /dev/null +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -0,0 +1,41 @@ +//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef SPARC_FRAMEINFO_H +#define SPARC_FRAMEINFO_H + +#include "Sparc.h" +#include "SparcSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class SparcSubtarget; + +class SparcFrameLowering : public TargetFrameLowering { + const SparcSubtarget &STI; +public: + explicit SparcFrameLowering(const SparcSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const { return false; } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 4ea94c4..8c6103d 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -44,9 +44,8 @@ public: SDNode *Select(SDNode *N); // Complex Pattern Selectors. - bool SelectADDRrr(SDNode *Op, SDValue N, SDValue &R1, SDValue &R2); - bool SelectADDRri(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Offset); + bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -71,7 +70,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr, +bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); @@ -112,8 +111,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr, return true; } -bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr, - SDValue &R1, SDValue &R2) { +bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { if (Addr.getOpcode() == ISD::FrameIndex) return false; if (Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress) @@ -160,7 +158,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { } else { TopPart = CurDAG->getRegister(SP::G0, MVT::i32); } - TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Flag, TopPart, + TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart, CurDAG->getRegister(SP::G0, MVT::i32)), 0); // FIXME: Handle div by immediate. @@ -174,7 +172,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { SDValue MulLHS = N->getOperand(0); SDValue MulRHS = N->getOperand(1); unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr; - SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Flag, + SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue, MulLHS, MulRHS); // The high part is in the Y register. return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1)); @@ -196,8 +194,8 @@ SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, switch (ConstraintCode) { default: return true; case 'm': // memory - if (!SelectADDRrr(Op.getNode(), Op, Op0, Op1)) - SelectADDRri(Op.getNode(), Op, Op0, Op1); + if (!SelectADDRrr(Op, Op0, Op1)) + SelectADDRri(Op, Op0, Op1); break; } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 4099a62..196b87d 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -1,3 +1,4 @@ + //===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===// // // The LLVM Compiler Infrastructure @@ -32,6 +33,47 @@ using namespace llvm; // Calling Convention Implementation //===----------------------------------------------------------------------===// +static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) +{ + assert (ArgFlags.isSRet()); + + //Assign SRet argument + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + 0, + LocVT, LocInfo)); + return true; +} + +static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) +{ + static const unsigned RegList[] = { + SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 + }; + //Try to get first reg + if (unsigned Reg = State.AllocateReg(RegList, 6)) { + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + } else { + //Assign whole thing in stack + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8,4), + LocVT, LocInfo)); + return true; + } + + //Try to get second reg + if (unsigned Reg = State.AllocateReg(RegList, 6)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(4,4), + LocVT, LocInfo)); + return true; +} + #include "SparcGenCallingConv.inc" SDValue @@ -41,6 +83,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain, const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; @@ -53,10 +97,10 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // If this is the first return lowered for this function, add the regs to the // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + if (MF.getRegInfo().liveout_empty()) { for (unsigned i = 0; i != RVLocs.size(); ++i) if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } SDValue Flag; @@ -66,12 +110,24 @@ SparcTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); } + // If the function returns a struct, copy the SRetReturnReg to I0 + if (MF.getFunction()->hasStructRetAttr()) { + SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>(); + unsigned Reg = SFI->getSRetReturnReg(); + if (!Reg) + llvm_unreachable("sret virtual register not created in the entry block"); + SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); + Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag); + Flag = Chain.getValue(1); + if (MF.getRegInfo().liveout_empty()) + MF.getRegInfo().addLiveOut(SP::I0); + } if (Flag.getNode()) return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag); @@ -100,135 +156,159 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32); - static const unsigned ArgRegs[] = { - SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 - }; - const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6; - unsigned ArgOffset = 68; + const unsigned StackOffset = 92; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - SDValue ArgValue; CCValAssign &VA = ArgLocs[i]; - // FIXME: We ignore the register assignments of AnalyzeFormalArguments - // because it doesn't know how to split a double into two i32 registers. - EVT ObjectVT = VA.getValVT(); - switch (ObjectVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unhandled argument type!"); - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - if (!Ins[i].Used) { // Argument is dead. - if (CurArgReg < ArgRegEnd) ++CurArgReg; - InVals.push_back(DAG.getUNDEF(ObjectVT)); - } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR - unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); - MF.getRegInfo().addLiveIn(*CurArgReg++, VReg); - SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - if (ObjectVT != MVT::i32) { - unsigned AssertOp = ISD::AssertSext; - Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg, - DAG.getValueType(ObjectVT)); - Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg); - } - InVals.push_back(Arg); - } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true); - SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - SDValue Load; - if (ObjectVT == MVT::i32) { - Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, - false, false, 0); - } else { - ISD::LoadExtType LoadOp = ISD::SEXTLOAD; - - // Sparc is big endian, so add an offset based on the ObjectVT. - unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8); - FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, - DAG.getConstant(Offset, MVT::i32)); - Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr, - NULL, 0, ObjectVT, false, false, 0); - Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); - } - InVals.push_back(Load); - } - ArgOffset += 4; - break; - case MVT::f32: - if (!Ins[i].Used) { // Argument is dead. - if (CurArgReg < ArgRegEnd) ++CurArgReg; - InVals.push_back(DAG.getUNDEF(ObjectVT)); - } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR - // FP value is passed in an integer register. - unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); - MF.getRegInfo().addLiveIn(*CurArgReg++, VReg); - SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg); - InVals.push_back(Arg); - } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true); - SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0, - false, false, 0); - InVals.push_back(Load); - } - ArgOffset += 4; - break; + if (i == 0 && Ins[i].Flags.isSRet()) { + //Get SRet from [%fp+64] + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true); + SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); + SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, + MachinePointerInfo(), + false, false, 0); + InVals.push_back(Arg); + continue; + } - case MVT::i64: - case MVT::f64: - if (!Ins[i].Used) { // Argument is dead. - if (CurArgReg < ArgRegEnd) ++CurArgReg; - if (CurArgReg < ArgRegEnd) ++CurArgReg; - InVals.push_back(DAG.getUNDEF(ObjectVT)); - } else { - SDValue HiVal; - if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR - unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); - MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi); - HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); - } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true); - SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, - false, false, 0); - } + if (VA.isRegLoc()) { + EVT RegVT = VA.getLocVT(); + + if (VA.needsCustom()) { + assert(VA.getLocVT() == MVT::f64); + unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi); + SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); + + assert(i+1 < e); + CCValAssign &NextVA = ArgLocs[++i]; SDValue LoVal; - if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR - unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); - MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo); - LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32); - } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, - true); + if (NextVA.isMemLoc()) { + int FrameIdx = MF.getFrameInfo()-> + CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, + LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, + MachinePointerInfo(), false, false, 0); + } else { + unsigned loReg = MF.addLiveIn(NextVA.getLocReg(), + &SP::IntRegsRegClass, dl); + LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32); } - - // Compose the two halves together into an i64 unit. SDValue WholeValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); + WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue); + InVals.push_back(WholeValue); + continue; + } + unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg); + SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + if (VA.getLocVT() == MVT::f32) + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg); + else if (VA.getLocVT() != MVT::i32) { + Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg, + DAG.getValueType(VA.getLocVT())); + Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg); + } + InVals.push_back(Arg); + continue; + } - // If we want a double, do a bit convert. - if (ObjectVT == MVT::f64) - WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue); + assert(VA.isMemLoc()); - InVals.push_back(WholeValue); + unsigned Offset = VA.getLocMemOffset()+StackOffset; + + if (VA.needsCustom()) { + assert(VA.getValVT() == MVT::f64); + //If it is double-word aligned, just load. + if (Offset % 8 == 0) { + int FI = MF.getFrameInfo()->CreateFixedObject(8, + Offset, + true); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, + MachinePointerInfo(), + false,false, 0); + InVals.push_back(Load); + continue; } - ArgOffset += 8; - break; + + int FI = MF.getFrameInfo()->CreateFixedObject(4, + Offset, + true); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, + MachinePointerInfo(), + false, false, 0); + int FI2 = MF.getFrameInfo()->CreateFixedObject(4, + Offset+4, + true); + SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy()); + + SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2, + MachinePointerInfo(), + false, false, 0); + + SDValue WholeValue = + DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); + WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue); + InVals.push_back(WholeValue); + continue; + } + + int FI = MF.getFrameInfo()->CreateFixedObject(4, + Offset, + true); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue Load ; + if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) { + Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, + MachinePointerInfo(), + false, false, 0); + } else { + ISD::LoadExtType LoadOp = ISD::SEXTLOAD; + // Sparc is big endian, so add an offset based on the ObjectVT. + unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8); + FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, + DAG.getConstant(Offset, MVT::i32)); + Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, + MachinePointerInfo(), + VA.getValVT(), false, false,0); + Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load); } + InVals.push_back(Load); + } + + if (MF.getFunction()->hasStructRetAttr()) { + //Copy the SRet Argument to SRetReturnReg + SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>(); + unsigned Reg = SFI->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass); + SFI->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } // Store remaining ArgRegs to the stack if this is a varargs function. if (isVarArg) { + static const unsigned ArgRegs[] = { + SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 + }; + unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6); + const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6; + unsigned ArgOffset = CCInfo.getNextStackOffset(); + if (NumAllocated == 6) + ArgOffset += StackOffset; + else { + assert(!ArgOffset); + ArgOffset = 68+4*NumAllocated; + } + // Remember the vararg offset for the va_start implementation. FuncInfo->setVarArgsFrameOffset(ArgOffset); @@ -243,7 +323,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0, + OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, + MachinePointerInfo(), false, false, 0)); ArgOffset += 4; } @@ -270,191 +351,180 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Sparc target does not yet support tail call optimization. isTailCall = false; -#if 0 // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs); + CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs, + *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); - // FIXME: We can't use this until f64 is known to take two GPRs. -#else - (void)CC_Sparc32; - - // Count the size of the outgoing arguments. - unsigned ArgsSize = 0; - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - switch (Outs[i].VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unknown value type!"); - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::f32: - ArgsSize += 4; - break; - case MVT::i64: - case MVT::f64: - ArgsSize += 8; - break; - } - } - if (ArgsSize > 4*6) - ArgsSize -= 4*6; // Space for first 6 arguments is prereserved. - else - ArgsSize = 0; -#endif // Keep stack frames 8-byte aligned. ArgsSize = (ArgsSize+7) & ~7; + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + + //Create local copies for byval args. + SmallVector<SDValue, 8> ByValArgs; + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (!Flags.isByVal()) + continue; + + SDValue Arg = OutVals[i]; + unsigned Size = Flags.getByValSize(); + unsigned Align = Flags.getByValAlign(); + + int FI = MFI->CreateStackObject(Size, Align, false); + SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); + SDValue SizeNode = DAG.getConstant(Size, MVT::i32); + + Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, + false, //isVolatile, + (Size <= 32), //AlwaysInline if size <= 32 + MachinePointerInfo(), MachinePointerInfo()); + ByValArgs.push_back(FIPtr); + } + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; -#if 0 + const unsigned StackOffset = 92; // Walk the register/memloc assignments, inserting copies/loads. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size(); + i != e; + ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = OutVals[i]; + SDValue Arg = OutVals[realArgIdx]; + + ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; + + //Use local copy if it is a byval arg. + if (Flags.isByVal()) + Arg = ByValArgs[byvalArgIdx++]; // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); break; } - // Arguments that can be passed on register must be kept at - // RegsToPass vector - if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (Flags.isSRet()) { + assert(VA.needsCustom()); + // store SRet argument in %sp+64 + SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); + SDValue PtrOff = DAG.getIntPtrConstant(64); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); continue; } - assert(VA.isMemLoc()); - - // Create a store off the stack pointer for this argument. - SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); - // FIXME: VERIFY THAT 68 IS RIGHT. - SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68); - PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0, - false, false, 0)); - } - -#else - static const unsigned ArgRegs[] = { - SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 - }; - unsigned ArgOffset = 68; - - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - SDValue Val = OutVals[i]; - EVT ObjectVT = Outs[i].VT; - SDValue ValToStore(0, 0); - unsigned ObjSize; - switch (ObjectVT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unhandled argument type!"); - case MVT::i32: - ObjSize = 4; - - if (RegsToPass.size() >= 6) { - ValToStore = Val; - } else { - RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val)); - } - break; - case MVT::f32: - ObjSize = 4; - if (RegsToPass.size() >= 6) { - ValToStore = Val; - } else { - // Convert this to a FP value in an int reg. - Val = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Val); - RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val)); - } - break; - case MVT::f64: { - ObjSize = 8; - if (RegsToPass.size() >= 6) { - ValToStore = Val; // Whole thing is passed in memory. - break; + if (VA.needsCustom()) { + assert(VA.getLocVT() == MVT::f64); + + if (VA.isMemLoc()) { + unsigned Offset = VA.getLocMemOffset() + StackOffset; + //if it is double-word aligned, just store. + if (Offset % 8 == 0) { + SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); + SDValue PtrOff = DAG.getIntPtrConstant(Offset); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); + continue; + } } - // Break into top and bottom parts by storing to the stack and loading - // out the parts as integers. Top part goes in a reg. SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Val, StackPtr, NULL, 0, + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, + Arg, StackPtr, MachinePointerInfo(), false, false, 0); // Sparc is big-endian, so the high part comes first. - SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, - false, false, 0); + SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, + MachinePointerInfo(), false, false, 0); // Increment the pointer to the other half. StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(4)); // Load the low part. - SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, - false, false, 0); - - RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); - - if (RegsToPass.size() >= 6) { - ValToStore = Lo; - ArgOffset += 4; - ObjSize = 4; + SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, + MachinePointerInfo(), false, false, 0); + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi)); + assert(i+1 != e); + CCValAssign &NextVA = ArgLocs[++i]; + if (NextVA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo)); + } else { + //Store the low part in stack. + unsigned Offset = NextVA.getLocMemOffset() + StackOffset; + SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); + SDValue PtrOff = DAG.getIntPtrConstant(Offset); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff, + MachinePointerInfo(), + false, false, 0)); + } } else { - RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo)); + unsigned Offset = VA.getLocMemOffset() + StackOffset; + // Store the high part. + SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); + SDValue PtrOff = DAG.getIntPtrConstant(Offset); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff, + MachinePointerInfo(), + false, false, 0)); + // Store the low part. + PtrOff = DAG.getIntPtrConstant(Offset+4); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff, + MachinePointerInfo(), + false, false, 0)); } - break; + continue; } - case MVT::i64: { - ObjSize = 8; - if (RegsToPass.size() >= 6) { - ValToStore = Val; // Whole thing is passed in memory. - break; - } - // Split the value into top and bottom part. Top part goes in a reg. - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val, - DAG.getConstant(1, MVT::i32)); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val, - DAG.getConstant(0, MVT::i32)); - RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi)); - - if (RegsToPass.size() >= 6) { - ValToStore = Lo; - ArgOffset += 4; - ObjSize = 4; - } else { - RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo)); + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + if (VA.getLocVT() != MVT::f32) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; } - break; - } + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + continue; } - if (ValToStore.getNode()) { - SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); - SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32); - PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, - PtrOff, NULL, 0, - false, false, 0)); - } - ArgOffset += ObjSize; + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); + SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); } -#endif + // Emit all stores, make sure the occur before any copies into physregs. if (!MemOpChains.empty()) @@ -484,11 +554,22 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); - std::vector<EVT> NodeTys; - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. - SDValue Ops[] = { Chain, Callee, InFlag }; - Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2); + // Returns a chain & a flag for retval copy to use + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + unsigned Reg = RegsToPass[i].first; + if (Reg >= SP::I0 && Reg <= SP::I7) + Reg = Reg-SP::I0+SP::O0; + + Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType())); + } + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), @@ -610,8 +691,8 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); // Sparc has no select or setcc: expand to SELECT_CC. setOperationAction(ISD::SELECT, MVT::i32, Expand); @@ -701,6 +782,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { case SPISD::ITOF: return "SPISD::ITOF"; case SPISD::CALL: return "SPISD::CALL"; case SPISD::RET_FLAG: return "SPISD::RET_FLAG"; + case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG"; + case SPISD::FLUSHW: return "SPISD::FLUSHW"; } } @@ -756,7 +839,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, } } -SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, +SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); // FIXME there isn't really any debug info here @@ -765,16 +848,16 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - + SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, getPointerTy()); SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, + SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, NULL, 0, false, false, 0); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + AbsAddr, MachinePointerInfo(), false, false, 0); } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, @@ -786,16 +869,16 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); - SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, + SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, getPointerTy()); SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); - return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, NULL, 0, false, false, 0); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + AbsAddr, MachinePointerInfo(), false, false, 0); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { @@ -803,13 +886,13 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { // Convert the fp value to integer in an FP register. assert(Op.getValueType() == MVT::i32); Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); } static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); assert(Op.getOperand(0).getValueType() == MVT::i32); - SDValue Tmp = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); + SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); // Convert the int value to FP in an FP register. return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp); } @@ -832,13 +915,13 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { if (LHS.getValueType() == MVT::i32) { std::vector<EVT> VTs; VTs.push_back(MVT::i32); - VTs.push_back(MVT::Flag); + VTs.push_back(MVT::Glue); SDValue Ops[2] = { LHS, RHS }; CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); Opc = SPISD::BRICC; } else { - CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS); + CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS); if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); Opc = SPISD::BRFCC; } @@ -863,13 +946,13 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { if (LHS.getValueType() == MVT::i32) { std::vector<EVT> VTs; VTs.push_back(LHS.getValueType()); // subcc returns a value - VTs.push_back(MVT::Flag); + VTs.push_back(MVT::Glue); SDValue Ops[2] = { LHS, RHS }; CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); Opc = SPISD::SELECT_ICC; if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); } else { - CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS); + CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS); Opc = SPISD::SELECT_FCC; if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC); } @@ -891,8 +974,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, DAG.getConstant(FuncInfo->getVarArgsFrameOffset(), MVT::i32)); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0, - false, false, 0); + return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); } static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { @@ -902,27 +985,28 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); DebugLoc dl = Node->getDebugLoc(); - SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0, - false, false, 0); + SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, + MachinePointerInfo(SV), false, false, 0); // Increment the pointer, VAList, to the next vaarg SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, DAG.getConstant(VT.getSizeInBits()/8, MVT::i32)); // Store the incremented VAList to the legalized pointer InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr, - VAListPtr, SV, 0, false, false, 0); + VAListPtr, MachinePointerInfo(SV), false, false, 0); // Load the actual argument out of the pointer VAList, unless this is an // f64 load. if (VT != MVT::f64) - return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0); + return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(), + false, false, 0); // Otherwise, load it as i64, then do a bitconvert. - SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0, + SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(), false, false, 0); // Bit-Convert the value to f64. SDValue Ops[2] = { - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V), + DAG.getNode(ISD::BITCAST, dl, MVT::f64, V), V.getValue(1) }; return DAG.getMergeValues(Ops, 2, dl); @@ -947,13 +1031,82 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { } +static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + SDValue Chain = DAG.getNode(SPISD::FLUSHW, + dl, MVT::Other, DAG.getEntryNode()); + return Chain; +} + +static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned FrameReg = SP::I6; + + uint64_t depth = Op.getConstantOperandVal(0); + + SDValue FrameAddr; + if (depth == 0) + FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); + else { + // flush first to make sure the windowed registers' values are in stack + SDValue Chain = getFLUSHW(Op, DAG); + FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT); + + for (uint64_t i = 0; i != depth; ++i) { + SDValue Ptr = DAG.getNode(ISD::ADD, + dl, MVT::i32, + FrameAddr, DAG.getIntPtrConstant(56)); + FrameAddr = DAG.getLoad(MVT::i32, dl, + Chain, + Ptr, + MachinePointerInfo(), false, false, 0); + } + } + return FrameAddr; +} + +static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned RetReg = SP::I7; + + uint64_t depth = Op.getConstantOperandVal(0); + + SDValue RetAddr; + if (depth == 0) + RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT); + else { + // flush first to make sure the windowed registers' values are in stack + SDValue Chain = getFLUSHW(Op, DAG); + RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT); + + for (uint64_t i = 0; i != depth; ++i) { + SDValue Ptr = DAG.getNode(ISD::ADD, + dl, MVT::i32, + RetAddr, + DAG.getIntPtrConstant((i == depth-1)?60:56)); + RetAddr = DAG.getLoad(MVT::i32, dl, + Chain, + Ptr, + MachinePointerInfo(), false, false, 0); + } + } + return RetAddr; +} + SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); - // Frame & Return address. Currently unimplemented - case ISD::RETURNADDR: return SDValue(); - case ISD::FRAMEADDR: return SDValue(); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for Sparc."); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); @@ -1009,6 +1162,8 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, @@ -1021,8 +1176,6 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB->addSuccessor(sinkMBB); BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); // copy0MBB: // %FalseValue = ... diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index db39e08..849e401 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -36,7 +36,8 @@ namespace llvm { CALL, // A call instruction. RET_FLAG, // Return with a flag operand. - GLOBAL_BASE_REG // Global base reg for PIC + GLOBAL_BASE_REG, // Global base reg for PIC + FLUSHW // FLUSH register windows to stack }; } diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 7ede8e7..afa3c1f 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -66,15 +66,200 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } +static bool IsIntegerCC(unsigned CC) +{ + return (CC <= SPCC::ICC_VC); +} + + +static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC) +{ + switch(CC) { + default: llvm_unreachable("Unknown condition code"); + case SPCC::ICC_NE: return SPCC::ICC_E; + case SPCC::ICC_E: return SPCC::ICC_NE; + case SPCC::ICC_G: return SPCC::ICC_LE; + case SPCC::ICC_LE: return SPCC::ICC_G; + case SPCC::ICC_GE: return SPCC::ICC_L; + case SPCC::ICC_L: return SPCC::ICC_GE; + case SPCC::ICC_GU: return SPCC::ICC_LEU; + case SPCC::ICC_LEU: return SPCC::ICC_GU; + case SPCC::ICC_CC: return SPCC::ICC_CS; + case SPCC::ICC_CS: return SPCC::ICC_CC; + case SPCC::ICC_POS: return SPCC::ICC_NEG; + case SPCC::ICC_NEG: return SPCC::ICC_POS; + case SPCC::ICC_VC: return SPCC::ICC_VS; + case SPCC::ICC_VS: return SPCC::ICC_VC; + + case SPCC::FCC_U: return SPCC::FCC_O; + case SPCC::FCC_O: return SPCC::FCC_U; + case SPCC::FCC_G: return SPCC::FCC_LE; + case SPCC::FCC_LE: return SPCC::FCC_G; + case SPCC::FCC_UG: return SPCC::FCC_ULE; + case SPCC::FCC_ULE: return SPCC::FCC_UG; + case SPCC::FCC_L: return SPCC::FCC_GE; + case SPCC::FCC_GE: return SPCC::FCC_L; + case SPCC::FCC_UL: return SPCC::FCC_UGE; + case SPCC::FCC_UGE: return SPCC::FCC_UL; + case SPCC::FCC_LG: return SPCC::FCC_UE; + case SPCC::FCC_UE: return SPCC::FCC_LG; + case SPCC::FCC_NE: return SPCC::FCC_E; + case SPCC::FCC_E: return SPCC::FCC_NE; + } +} + + +bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const +{ + + MachineBasicBlock::iterator I = MBB.end(); + MachineBasicBlock::iterator UnCondBrIter = MBB.end(); + while (I != MBB.begin()) { + --I; + + if (I->isDebugValue()) + continue; + + //When we see a non-terminator, we are done + if (!isUnpredicatedTerminator(I)) + break; + + //Terminator is not a branch + if (!I->getDesc().isBranch()) + return true; + + //Handle Unconditional branches + if (I->getOpcode() == SP::BA) { + UnCondBrIter = I; + + if (!AllowModify) { + TBB = I->getOperand(0).getMBB(); + continue; + } + + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); + + Cond.clear(); + FBB = 0; + + if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + UnCondBrIter = MBB.end(); + continue; + } + + TBB = I->getOperand(0).getMBB(); + continue; + } + + unsigned Opcode = I->getOpcode(); + if (Opcode != SP::BCOND && Opcode != SP::FBCOND) + return true; //Unknown Opcode + + SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm(); + + if (Cond.empty()) { + MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); + if (AllowModify && UnCondBrIter != MBB.end() && + MBB.isLayoutSuccessor(TargetBB)) { + + //Transform the code + // + // brCC L1 + // ba L2 + // L1: + // .. + // L2: + // + // into + // + // brnCC L2 + // L1: + // ... + // L2: + // + BranchCode = GetOppositeBranchCondition(BranchCode); + MachineBasicBlock::iterator OldInst = I; + BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode)) + .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode); + BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA)) + .addMBB(TargetBB); + MBB.addSuccessor(TargetBB); + OldInst->eraseFromParent(); + UnCondBrIter->eraseFromParent(); + + UnCondBrIter = MBB.end(); + I = MBB.end(); + continue; + } + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(BranchCode)); + continue; + } + //FIXME: Handle subsequent conditional branches + //For now, we can't handle multiple conditional branches + return true; + } + return false; +} + unsigned SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL)const{ - // Can only insert uncond branches so far. - assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB); - return 1; + DebugLoc DL) const { + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "Sparc branch conditions should have one component!"); + + if (Cond.empty()) { + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB); + return 1; + } + + //Conditional branch + unsigned CC = Cond[0].getImm(); + + if (IsIntegerCC(CC)) + BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC); + else + BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC); + if (!FBB) + return 1; + + BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB); + return 2; +} + +unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const +{ + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + while (I != MBB.begin()) { + --I; + + if (I->isDebugValue()) + continue; + + if (I->getOpcode() != SP::BA + && I->getOpcode() != SP::BCOND + && I->getOpcode() != SP::FBCOND) + break; // Not a branch + + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + return Count; } void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index c00bd21..b2d24f5 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -58,8 +58,15 @@ public: /// any side effects other than storing to the stack slot. virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - - + + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const ; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 467ed48..1072323 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -95,10 +95,10 @@ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>; def SDTSPITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; -def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>; -def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>; -def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>; -def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>; +def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>; +def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>; +def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; +def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>; def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>; def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>; @@ -106,8 +106,8 @@ def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>; def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>; def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>; -def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>; -def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>; +def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>; +def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>; // These are target-independent nodes, but have target-specific formats. def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; @@ -115,16 +115,20 @@ def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>; def call : SDNode<"SPISD::CALL", SDT_SPCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def retflag : SDNode<"SPISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; + +def flushw : SDNode<"SPISD::FLUSHW", SDTNone, + [SDNPHasChain]>; def getPCX : Operand<i32> { let PrintMethod = "printGetPCX"; @@ -204,7 +208,7 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> : InstSP<outs, ins, asmstr, pattern>; // GETPCX for PIC -let Defs = [O7], Uses = [O7] in { +let Defs = [O7] in { def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >; } @@ -217,6 +221,17 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(callseq_end timm:$amt1, timm:$amt2)]>; } +let hasSideEffects = 1, mayStore = 1 in { + let rd = 0, rs1 = 0, rs2 = 0 in + def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins), + "flushw", + [(flushw)]>, Requires<[HasV9]>; + let rd = 0, rs1 = 1, simm13 = 3 in + def TA3 : F3_2<0b10, 0b111010, (outs), (ins), + "ta 3", + [(flushw)]>; +} + // FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the // fpmover pass. let Predicates = [HasNoV9] in { // Only emit these in V8 mode. @@ -233,32 +248,39 @@ let Predicates = [HasNoV9] in { // Only emit these in V8 mode. // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. This has to handle all // permutations of selection between i32/f32/f64 on ICC and FCC. -let usesCustomInserter = 1 in { // Expanded after instruction selection. + // Expanded after instruction selection. +let Uses = [ICC], usesCustomInserter = 1 in { def SELECT_CC_Int_ICC : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_ICC PSEUDO!", [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F, imm:$Cond))]>; - def SELECT_CC_Int_FCC - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), - "; SELECT_CC_Int_FCC PSEUDO!", - [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F, - imm:$Cond))]>; def SELECT_CC_FP_ICC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_ICC PSEUDO!", [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F, imm:$Cond))]>; - def SELECT_CC_FP_FCC - : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), - "; SELECT_CC_FP_FCC PSEUDO!", - [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F, - imm:$Cond))]>; + def SELECT_CC_DFP_ICC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_ICC PSEUDO!", [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F, imm:$Cond))]>; +} + +let usesCustomInserter = 1, Uses = [FCC] in { + + def SELECT_CC_Int_FCC + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), + "; SELECT_CC_Int_FCC PSEUDO!", + [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F, + imm:$Cond))]>; + + def SELECT_CC_FP_FCC + : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), + "; SELECT_CC_FP_FCC PSEUDO!", + [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F, + imm:$Cond))]>; def SELECT_CC_DFP_FCC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_FCC PSEUDO!", @@ -272,6 +294,9 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; + + let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in + def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>; } // Section B.1 - Load Integer Instructions, p. 90 @@ -436,28 +461,34 @@ def LEA_ADDri : F3_2<2, 0b000000, let Defs = [ICC] in defm ADDCC : F3_12<"addcc", 0b010000, addc>; -defm ADDX : F3_12<"addx", 0b001000, adde>; +let Uses = [ICC] in + defm ADDX : F3_12<"addx", 0b001000, adde>; // Section B.15 - Subtract Instructions, p. 110 defm SUB : F3_12 <"sub" , 0b000100, sub>; -defm SUBX : F3_12 <"subx" , 0b001100, sube>; +let Uses = [ICC] in + defm SUBX : F3_12 <"subx" , 0b001100, sube>; -let Defs = [ICC] in { +let Defs = [ICC] in defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>; +let Uses = [ICC], Defs = [ICC] in def SUBXCCrr: F3_1<2, 0b011100, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "subxcc $b, $c, $dst", []>; -} -// Section B.18 - Multiply Instructions, p. 113 -defm UMUL : F3_12np<"umul", 0b001010>; -defm SMUL : F3_12 <"smul", 0b001011, mul>; +// Section B.18 - Multiply Instructions, p. 113 +let Defs = [Y] in { + defm UMUL : F3_12np<"umul", 0b001010>; + defm SMUL : F3_12 <"smul", 0b001011, mul>; +} // Section B.19 - Divide Instructions, p. 115 -defm UDIV : F3_12np<"udiv", 0b001110>; -defm SDIV : F3_12np<"sdiv", 0b001111>; +let Defs = [Y] in { + defm UDIV : F3_12np<"udiv", 0b001110>; + defm SDIV : F3_12np<"sdiv", 0b001111>; +} // Section B.20 - SAVE and RESTORE, p. 117 defm SAVE : F3_12np<"save" , 0b111100>; @@ -504,11 +535,12 @@ let Uses = [FCC] in // Section B.24 - Call and Link Instruction, p. 125 // This is the only Format 1 instruction -let Uses = [O0, O1, O2, O3, O4, O5], +let Uses = [O6], hasDelaySlot = 1, isCall = 1, Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7, - D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in { - def CALL : InstSP<(outs), (ins calltarget:$dst), + D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, + ICC, FCC, Y] in { + def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops), "call $dst", []> { bits<30> disp; let op = 1; @@ -517,28 +549,30 @@ let Uses = [O0, O1, O2, O3, O4, O5], // indirect calls def JMPLrr : F3_1<2, 0b111000, - (outs), (ins MEMrr:$ptr), + (outs), (ins MEMrr:$ptr, variable_ops), "call $ptr", [(call ADDRrr:$ptr)]>; def JMPLri : F3_2<2, 0b111000, - (outs), (ins MEMri:$ptr), + (outs), (ins MEMri:$ptr, variable_ops), "call $ptr", [(call ADDRri:$ptr)]>; } // Section B.28 - Read State Register Instructions -def RDY : F3_1<2, 0b101000, - (outs IntRegs:$dst), (ins), - "rd %y, $dst", []>; +let Uses = [Y] in + def RDY : F3_1<2, 0b101000, + (outs IntRegs:$dst), (ins), + "rd %y, $dst", []>; // Section B.29 - Write State Register Instructions -def WRYrr : F3_1<2, 0b110000, - (outs), (ins IntRegs:$b, IntRegs:$c), - "wr $b, $c, %y", []>; -def WRYri : F3_2<2, 0b110000, - (outs), (ins IntRegs:$b, i32imm:$c), - "wr $b, $c, %y", []>; - +let Defs = [Y] in { + def WRYrr : F3_1<2, 0b110000, + (outs), (ins IntRegs:$b, IntRegs:$c), + "wr $b, $c, %y", []>; + def WRYri : F3_2<2, 0b110000, + (outs), (ins IntRegs:$b, i32imm:$c), + "wr $b, $c, %y", []>; +} // Convert Integer to Floating-point Instructions, p. 141 def FITOS : F3_3<2, 0b110100, 0b011000100, (outs FPRegs:$dst), (ins FPRegs:$src), @@ -660,48 +694,57 @@ let Defs = [FCC] in { let Predicates = [HasV9], Constraints = "$T = $dst" in { // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual. // FIXME: Add instruction encodings for the JIT some day. - def MOVICCrr - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), - "mov$cc %icc, $F, $dst", - [(set IntRegs:$dst, - (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>; - def MOVICCri - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), - "mov$cc %icc, $F, $dst", - [(set IntRegs:$dst, - (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>; - - def MOVFCCrr - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), - "mov$cc %fcc0, $F, $dst", - [(set IntRegs:$dst, - (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>; - def MOVFCCri - : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), - "mov$cc %fcc0, $F, $dst", - [(set IntRegs:$dst, - (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>; - - def FMOVS_ICC - : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), - "fmovs$cc %icc, $F, $dst", - [(set FPRegs:$dst, - (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>; - def FMOVD_ICC - : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), - "fmovd$cc %icc, $F, $dst", - [(set DFPRegs:$dst, - (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; - def FMOVS_FCC - : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), - "fmovs$cc %fcc0, $F, $dst", - [(set FPRegs:$dst, - (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>; - def FMOVD_FCC - : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), - "fmovd$cc %fcc0, $F, $dst", - [(set DFPRegs:$dst, - (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + let Uses = [ICC] in { + def MOVICCrr + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), + "mov$cc %icc, $F, $dst", + [(set IntRegs:$dst, + (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + def MOVICCri + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), + "mov$cc %icc, $F, $dst", + [(set IntRegs:$dst, + (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>; + } + + let Uses = [FCC] in { + def MOVFCCrr + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), + "mov$cc %fcc0, $F, $dst", + [(set IntRegs:$dst, + (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + def MOVFCCri + : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), + "mov$cc %fcc0, $F, $dst", + [(set IntRegs:$dst, + (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>; + } + + let Uses = [ICC] in { + def FMOVS_ICC + : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), + "fmovs$cc %icc, $F, $dst", + [(set FPRegs:$dst, + (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + def FMOVD_ICC + : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), + "fmovd$cc %icc, $F, $dst", + [(set DFPRegs:$dst, + (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + } + + let Uses = [FCC] in { + def FMOVS_FCC + : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), + "fmovs$cc %fcc0, $F, $dst", + [(set FPRegs:$dst, + (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + def FMOVD_FCC + : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), + "fmovd$cc %fcc0, $F, $dst", + [(set DFPRegs:$dst, + (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + } } diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index e34c131..0b74308 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -24,16 +24,23 @@ namespace llvm { /// VarArgsFrameOffset - Frame offset to start of varargs area. int VarArgsFrameOffset; + /// SRetReturnReg - Holds the virtual register into which the sret + /// argument is passed. + unsigned SRetReturnReg; public: - SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0) {} + SparcMachineFunctionInfo() + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {} explicit SparcMachineFunctionInfo(MachineFunction &MF) - : GlobalBaseReg(0), VarArgsFrameOffset(0) {} + : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; } + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } }; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index c85db20..b010d04 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -52,10 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const { - return false; -} - void SparcRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -112,55 +108,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, void SparcRegisterInfo:: processFunctionBeforeFrameFinalized(MachineFunction &MF) const {} -void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Get the number of bytes to allocate from the FrameInfo - int NumBytes = (int) MFI->getStackSize(); - - // Emit the correct save instruction based on the number of bytes in - // the frame. Minimum stack frame size according to V8 ABI is: - // 16 words for register window spill - // 1 word for address of returned aggregate-value - // + 6 words for passing parameters on the stack - // ---------- - // 23 words * 4 bytes per word = 92 bytes - NumBytes += 92; - - // Round up to next doubleword boundary -- a double-word boundary - // is required by the ABI. - NumBytes = (NumBytes + 7) & ~7; - NumBytes = -NumBytes; - - if (NumBytes >= -4096) { - BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6) - .addReg(SP::O6).addImm(NumBytes); - } else { - // Emit this the hard way. This clobbers G1 which we always know is - // available here. - unsigned OffHi = (unsigned)NumBytes >> 10U; - BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi); - // Emit G1 = G1 + I6 - BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1) - .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1)); - BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6) - .addReg(SP::O6).addReg(SP::G1); - } -} - -void SparcRegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - DebugLoc dl = MBBI->getDebugLoc(); - assert(MBBI->getOpcode() == SP::RETL && - "Can only put epilog before 'retl' instruction!"); - BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) - .addReg(SP::G0); -} - unsigned SparcRegisterInfo::getRARegister() const { return SP::I7; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 020ce56..d930b53 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -26,16 +26,14 @@ class Type; struct SparcRegisterInfo : public SparcGenRegisterInfo { SparcSubtarget &Subtarget; const TargetInstrInfo &TII; - + SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii); - /// Code Generation virtual methods... + /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -45,9 +43,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index fede929..5ef4dae 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -45,6 +45,9 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> { def ICC : SparcCtrlReg<"ICC">; def FCC : SparcCtrlReg<"FCC">; +// Y register +def Y : SparcCtrlReg<"Y">; + // Integer registers def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index b58d6ba..b84eab5 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "Sparc.h" #include "SparcMCAsmInfo.h" #include "SparcTargetMachine.h" -#include "Sparc.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetRegistry.h" using namespace llvm; @@ -34,8 +34,8 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64bit), DataLayout(Subtarget.getDataLayout()), - TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), - FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { + TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), + FrameLowering(Subtarget) { } bool SparcTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 322c82a..c4bb6bd 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -14,13 +14,14 @@ #ifndef SPARCTARGETMACHINE_H #define SPARCTARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "SparcInstrInfo.h" -#include "SparcSubtarget.h" #include "SparcISelLowering.h" +#include "SparcFrameLowering.h" #include "SparcSelectionDAGInfo.h" +#include "SparcSubtarget.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -30,13 +31,15 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; SparcInstrInfo InstrInfo; - TargetFrameInfo FrameInfo; + SparcFrameLowering FrameLowering; public: SparcTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; } virtual const SparcRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index b35190a..3cf95b5 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -18,6 +18,7 @@ #include <algorithm> #include <cassert> #include <cctype> +#include <cstdlib> using namespace llvm; //===----------------------------------------------------------------------===// @@ -162,7 +163,7 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, errs() << "Use +feature to enable a feature, or -feature to disable it.\n" << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n"; - exit(1); + std::exit(1); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt b/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt deleted file mode 100644 index c6be83a..0000000 --- a/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMSystemZAsmPrinter - SystemZAsmPrinter.cpp - ) -add_dependencies(LLVMSystemZAsmPrinter SystemZCodeGenTable_gen) diff --git a/lib/Target/SystemZ/AsmPrinter/Makefile b/lib/Target/SystemZ/AsmPrinter/Makefile deleted file mode 100644 index 0f90ed3..0000000 --- a/lib/Target/SystemZ/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/SystemZ/AsmPrinter/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMSystemZAsmPrinter - -# Hack: we need to include 'main' SystemZ target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp deleted file mode 100644 index d7ac8f5..0000000 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ /dev/null @@ -1,217 +0,0 @@ -//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to the SystemZ assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "SystemZ.h" -#include "SystemZInstrInfo.h" -#include "SystemZTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class SystemZAsmPrinter : public AsmPrinter { - public: - SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "SystemZ Assembly Printer"; - } - - void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char* Modifier = 0); - void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); - void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char* Modifier = 0); - void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char* Modifier = 0); - void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { - O << (int16_t)MI->getOperand(OpNum).getImm(); - } - void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { - O << (int32_t)MI->getOperand(OpNum).getImm(); - } - - void printInstruction(const MachineInstr *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - void EmitInstruction(const MachineInstr *MI); - }; -} // end of anonymous namespace - -#include "SystemZGenAsmWriter.inc" - -void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); -} - -void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - case MachineOperand::MO_Immediate: - O << MO.getImm(); - return; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_GlobalAddress: { - const GlobalValue *GV = MO.getGlobal(); - O << *Mang->getSymbol(GV); - - // Assemble calls via PLT for externally visible symbols if PIC. - if (TM.getRelocationModel() == Reloc::PIC_ && - !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() && - !GV->hasLocalLinkage()) - O << "@PLT"; - - printOffset(MO.getOffset(), O); - return; - } - case MachineOperand::MO_ExternalSymbol: { - std::string Name(MAI->getGlobalPrefix()); - Name += MO.getSymbolName(); - O << Name; - - if (TM.getRelocationModel() == Reloc::PIC_) - O << "@PLT"; - - return; - } - default: - assert(0 && "Not implemented yet!"); - } -} - - -void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, const char *Modifier) { - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - case MachineOperand::MO_Register: { - assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - "Virtual registers should be already mapped!"); - unsigned Reg = MO.getReg(); - if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { - if (strncmp(Modifier + 7, "even", 4) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit); - else if (strncmp(Modifier + 7, "odd", 3) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32); - else - assert(0 && "Invalid subreg modifier"); - } - - O << '%' << getRegisterName(Reg); - return; - } - case MachineOperand::MO_Immediate: - O << MO.getImm(); - return; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' - << MO.getIndex(); - - return; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' - << MO.getIndex(); - - printOffset(MO.getOffset(), O); - break; - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - break; - case MachineOperand::MO_ExternalSymbol: { - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - break; - } - default: - assert(0 && "Not implemented yet!"); - } - - switch (MO.getTargetFlags()) { - default: assert(0 && "Unknown target flag on GV operand"); - case SystemZII::MO_NO_FLAG: - break; - case SystemZII::MO_GOTENT: O << "@GOTENT"; break; - case SystemZII::MO_PLT: O << "@PLT"; break; - } - - printOffset(MO.getOffset(), O); -} - -void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, - const char *Modifier) { - const MachineOperand &Base = MI->getOperand(OpNum); - - // Print displacement operand. - printOperand(MI, OpNum+1, O); - - // Print base operand (if any) - if (Base.getReg()) { - O << '('; - printOperand(MI, OpNum, O); - O << ')'; - } -} - -void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, - const char *Modifier) { - const MachineOperand &Base = MI->getOperand(OpNum); - const MachineOperand &Index = MI->getOperand(OpNum+2); - - // Print displacement operand. - printOperand(MI, OpNum+1, O); - - // Print base operand (if any) - if (Base.getReg()) { - O << '('; - printOperand(MI, OpNum, O); - if (Index.getReg()) { - O << ','; - printOperand(MI, OpNum+2, O); - } - O << ')'; - } else - assert(!Index.getReg() && "Should allocate base register first!"); -} - -// Force static initialization. -extern "C" void LLVMInitializeSystemZAsmPrinter() { - RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget); -} diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 880e56f..1f5d355 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -11,9 +11,11 @@ tablegen(SystemZGenCallingConv.inc -gen-callingconv) tablegen(SystemZGenSubtarget.inc -gen-subtarget) add_llvm_target(SystemZCodeGen + SystemZAsmPrinter.cpp SystemZISelDAGToDAG.cpp SystemZISelLowering.cpp SystemZInstrInfo.cpp + SystemZFrameLowering.cpp SystemZMCAsmInfo.cpp SystemZRegisterInfo.cpp SystemZSubtarget.cpp @@ -21,4 +23,4 @@ add_llvm_target(SystemZCodeGen SystemZSelectionDAGInfo.cpp ) -target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG) +add_subdirectory(TargetInfo) diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile index 5b44090..6930e14 100644 --- a/lib/Target/SystemZ/Makefile +++ b/lib/Target/SystemZ/Makefile @@ -17,7 +17,7 @@ BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \ SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \ SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc -DIRS = AsmPrinter TargetInfo +DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp new file mode 100644 index 0000000..fd4d8b7 --- /dev/null +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -0,0 +1,223 @@ +//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the SystemZ assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class SystemZAsmPrinter : public AsmPrinter { + public: + SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "SystemZ Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char* Modifier = 0); + void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); + void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char* Modifier = 0); + void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char* Modifier = 0); + void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { + O << (int16_t)MI->getOperand(OpNum).getImm(); + } + void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { + O << (uint16_t)MI->getOperand(OpNum).getImm(); + } + void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { + O << (int32_t)MI->getOperand(OpNum).getImm(); + } + void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { + O << (uint32_t)MI->getOperand(OpNum).getImm(); + } + + void printInstruction(const MachineInstr *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void EmitInstruction(const MachineInstr *MI); + }; +} // end of anonymous namespace + +#include "SystemZGenAsmWriter.inc" + +void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); +} + +void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + O << *Mang->getSymbol(GV); + + // Assemble calls via PLT for externally visible symbols if PIC. + if (TM.getRelocationModel() == Reloc::PIC_ && + !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() && + !GV->hasLocalLinkage()) + O << "@PLT"; + + printOffset(MO.getOffset(), O); + return; + } + case MachineOperand::MO_ExternalSymbol: { + std::string Name(MAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + O << Name; + + if (TM.getRelocationModel() == Reloc::PIC_) + O << "@PLT"; + + return; + } + default: + assert(0 && "Not implemented yet!"); + } +} + + +void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: { + assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Virtual registers should be already mapped!"); + unsigned Reg = MO.getReg(); + if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { + if (strncmp(Modifier + 7, "even", 4) == 0) + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit); + else if (strncmp(Modifier + 7, "odd", 3) == 0) + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32); + else + assert(0 && "Invalid subreg modifier"); + } + + O << '%' << getRegisterName(Reg); + return; + } + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' + << MO.getIndex(); + + return; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << MO.getIndex(); + + printOffset(MO.getOffset(), O); + break; + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + break; + case MachineOperand::MO_ExternalSymbol: { + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + break; + } + default: + assert(0 && "Not implemented yet!"); + } + + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case SystemZII::MO_NO_FLAG: + break; + case SystemZII::MO_GOTENT: O << "@GOTENT"; break; + case SystemZII::MO_PLT: O << "@PLT"; break; + } + + printOffset(MO.getOffset(), O); +} + +void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O, + const char *Modifier) { + const MachineOperand &Base = MI->getOperand(OpNum); + + // Print displacement operand. + printOperand(MI, OpNum+1, O); + + // Print base operand (if any) + if (Base.getReg()) { + O << '('; + printOperand(MI, OpNum, O); + O << ')'; + } +} + +void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O, + const char *Modifier) { + const MachineOperand &Base = MI->getOperand(OpNum); + const MachineOperand &Index = MI->getOperand(OpNum+2); + + // Print displacement operand. + printOperand(MI, OpNum+1, O); + + // Print base operand (if any) + if (Base.getReg()) { + O << '('; + printOperand(MI, OpNum, O); + if (Index.getReg()) { + O << ','; + printOperand(MI, OpNum+2, O); + } + O << ')'; + } else + assert(!Index.getReg() && "Should allocate base register first!"); +} + +// Force static initialization. +extern "C" void LLVMInitializeSystemZAsmPrinter() { + RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget); +} diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp new file mode 100644 index 0000000..2ad84a2 --- /dev/null +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -0,0 +1,386 @@ +//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZFrameLowering.h" +#include "SystemZInstrBuilder.h" +#include "SystemZInstrInfo.h" +#include "SystemZMachineFunctionInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) { + // Fill the spill offsets map + static const unsigned SpillOffsTab[][2] = { + { SystemZ::R2D, 0x10 }, + { SystemZ::R3D, 0x18 }, + { SystemZ::R4D, 0x20 }, + { SystemZ::R5D, 0x28 }, + { SystemZ::R6D, 0x30 }, + { SystemZ::R7D, 0x38 }, + { SystemZ::R8D, 0x40 }, + { SystemZ::R9D, 0x48 }, + { SystemZ::R10D, 0x50 }, + { SystemZ::R11D, 0x58 }, + { SystemZ::R12D, 0x60 }, + { SystemZ::R13D, 0x68 }, + { SystemZ::R14D, 0x70 }, + { SystemZ::R15D, 0x78 } + }; + + RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); + + for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i) + RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1]; +} + +/// needsFP - Return true if the specified function should have a dedicated +/// frame pointer register. This is true if the function has variable sized +/// allocas or if frame pointer elimination is disabled. +bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); +} + +/// emitSPUpdate - Emit a series of instructions to increment / decrement the +/// stack pointer by a constant value. +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + int64_t NumBytes, const TargetInstrInfo &TII) { + unsigned Opc; uint64_t Chunk; + bool isSub = NumBytes < 0; + uint64_t Offset = isSub ? -NumBytes : NumBytes; + + if (Offset >= (1LL << 15) - 1) { + Opc = SystemZ::ADD64ri32; + Chunk = (1LL << 31) - 1; + } else { + Opc = SystemZ::ADD64ri16; + Chunk = (1LL << 15) - 1; + } + + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + while (Offset) { + uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D) + .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal); + // The PSW implicit def is dead. + MI->getOperand(3).setIsDead(); + Offset -= ThisVal; + } +} + +void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineFrameInfo *MFI = MF.getFrameInfo(); + const SystemZInstrInfo &TII = + *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo()); + SystemZMachineFunctionInfo *SystemZMFI = + MF.getInfo<SystemZMachineFunctionInfo>(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Get the number of bytes to allocate from the FrameInfo. + // Note that area for callee-saved stuff is already allocated, thus we need to + // 'undo' the stack movement. + uint64_t StackSize = MFI->getStackSize(); + StackSize -= SystemZMFI->getCalleeSavedFrameSize(); + + uint64_t NumBytes = StackSize - getOffsetOfLocalArea(); + + // Skip the callee-saved push instructions. + while (MBBI != MBB.end() && + (MBBI->getOpcode() == SystemZ::MOV64mr || + MBBI->getOpcode() == SystemZ::MOV64mrm)) + ++MBBI; + + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + // adjust stack pointer: R15 -= numbytes + if (StackSize || MFI->hasCalls()) { + assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && + "Invalid stack frame calculation!"); + emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII); + } + + if (hasFP(MF)) { + // Update R11 with the new base value... + BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D) + .addReg(SystemZ::R15D); + + // Mark the FramePtr as live-in in every block except the entry. + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ++I) + I->addLiveIn(SystemZ::R11D); + + } +} + +void SystemZFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const SystemZInstrInfo &TII = + *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo()); + SystemZMachineFunctionInfo *SystemZMFI = + MF.getInfo<SystemZMachineFunctionInfo>(); + unsigned RetOpcode = MBBI->getOpcode(); + + switch (RetOpcode) { + case SystemZ::RET: break; // These are ok + default: + assert(0 && "Can only insert epilog into returning blocks"); + } + + // Get the number of bytes to allocate from the FrameInfo + // Note that area for callee-saved stuff is already allocated, thus we need to + // 'undo' the stack movement. + uint64_t StackSize = + MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize(); + uint64_t NumBytes = StackSize - getOffsetOfLocalArea(); + + // Skip the final terminator instruction. + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = prior(MBBI); + --MBBI; + if (!PI->getDesc().isTerminator()) + break; + } + + // During callee-saved restores emission stack frame was not yet finialized + // (and thus - the stack size was unknown). Tune the offset having full stack + // size in hands. + if (StackSize || MFI->hasCalls()) { + assert((MBBI->getOpcode() == SystemZ::MOV64rmm || + MBBI->getOpcode() == SystemZ::MOV64rm) && + "Expected to see callee-save register restore code"); + assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && + "Invalid stack frame calculation!"); + + unsigned i = 0; + MachineInstr &MI = *MBBI; + while (!MI.getOperand(i).isImm()) { + ++i; + assert(i < MI.getNumOperands() && "Unexpected restore code!"); + } + + uint64_t Offset = NumBytes + MI.getOperand(i).getImm(); + // If Offset does not fit into 20-bit signed displacement field we need to + // emit some additional code... + if (Offset > 524287) { + // Fold the displacement into load instruction as much as possible. + NumBytes = Offset - 524287; + Offset = 524287; + emitSPUpdate(MBB, MBBI, NumBytes, TII); + } + + MI.getOperand(i).ChangeToImmediate(Offset); + } +} + +int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const SystemZMachineFunctionInfo *SystemZMFI = + MF.getInfo<SystemZMachineFunctionInfo>(); + int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment(); + uint64_t StackSize = MFI->getStackSize(); + + // Fixed objects are really located in the "previous" frame. + if (FI < 0) + StackSize -= SystemZMFI->getCalleeSavedFrameSize(); + + Offset += StackSize - getOffsetOfLocalArea(); + + // Skip the register save area if we generated the stack frame. + if (StackSize || MFI->hasCalls()) + Offset -= getOffsetOfLocalArea(); + + return Offset; +} + +bool +SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); + unsigned CalleeFrameSize = 0; + + // Scan the callee-saved and find the bounds of register spill area. + unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (!SystemZ::FP64RegClass.contains(Reg)) { + unsigned Offset = RegSpillOffsets[Reg]; + CalleeFrameSize += 8; + if (StartOffset > Offset) { + LowReg = Reg; StartOffset = Offset; + } + if (EndOffset < Offset) { + HighReg = Reg; EndOffset = RegSpillOffsets[Reg]; + } + } + } + + // Save information for epilogue inserter. + MFI->setCalleeSavedFrameSize(CalleeFrameSize); + MFI->setLowReg(LowReg); MFI->setHighReg(HighReg); + + // Save GPRs + if (StartOffset) { + // Build a store instruction. Use STORE MULTIPLE instruction if there are many + // registers to store, otherwise - just STORE. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ? + SystemZ::MOV64mr : SystemZ::MOV64mrm))); + + // Add store operands. + MIB.addReg(SystemZ::R15D).addImm(StartOffset); + if (LowReg == HighReg) + MIB.addReg(0); + MIB.addReg(LowReg, RegState::Kill); + if (LowReg != HighReg) + MIB.addReg(HighReg, RegState::Kill); + + // Do a second scan adding regs as being killed by instruction + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + if (Reg != LowReg && Reg != HighReg) + MIB.addReg(Reg, RegState::ImplicitKill); + } + } + + // Save FPRs + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (SystemZ::FP64RegClass.contains(Reg)) { + MBB.addLiveIn(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), + &SystemZ::FP64RegClass, TRI); + } + } + + return true; +} + +bool +SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); + + // Restore FP registers + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (SystemZ::FP64RegClass.contains(Reg)) + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &SystemZ::FP64RegClass, TRI); + } + + // Restore GP registers + unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg(); + unsigned StartOffset = RegSpillOffsets[LowReg]; + + if (StartOffset) { + // Build a load instruction. Use LOAD MULTIPLE instruction if there are many + // registers to load, otherwise - just LOAD. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ? + SystemZ::MOV64rm : SystemZ::MOV64rmm))); + // Add store operands. + MIB.addReg(LowReg, RegState::Define); + if (LowReg != HighReg) + MIB.addReg(HighReg, RegState::Define); + + MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D); + MIB.addImm(StartOffset); + if (LowReg == HighReg) + MIB.addReg(0); + + // Do a second scan adding regs as being defined by instruction + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (Reg != LowReg && Reg != HighReg) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + } + + return true; +} + +void +SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // Determine whether R15/R14 will ever be clobbered inside the function. And + // if yes - mark it as 'callee' saved. + MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Check whether high FPRs are ever used, if yes - we need to save R15 as + // well. + static const unsigned HighFPRs[] = { + SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, + SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, + }; + + bool HighFPRsUsed = false; + for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i) + HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]); + + if (FFI->hasCalls()) + /* FIXME: function is varargs */ + /* FIXME: function grabs RA */ + /* FIXME: function calls eh_return */ + MRI.setPhysRegUsed(SystemZ::R14D); + + if (HighFPRsUsed || + FFI->hasCalls() || + FFI->getObjectIndexEnd() != 0 || // Contains automatic variables + FFI->hasVarSizedObjects() // Function calls dynamic alloca's + /* FIXME: function is varargs */) + MRI.setPhysRegUsed(SystemZ::R15D); +} diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h new file mode 100644 index 0000000..1284b68 --- /dev/null +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -0,0 +1,57 @@ +//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZ_FRAMEINFO_H +#define SYSTEMZ_FRAMEINFO_H + +#include "SystemZ.h" +#include "SystemZSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/ADT/IndexedMap.h" + +namespace llvm { + class SystemZSubtarget; + +class SystemZFrameLowering : public TargetFrameLowering { + IndexedMap<unsigned> RegSpillOffsets; +protected: + const SystemZSubtarget &STI; + +public: + explicit SystemZFrameLowering(const SystemZSubtarget &sti); + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + + bool hasReservedCallFrame(const MachineFunction &MF) const { return true; } + bool hasFP(const MachineFunction &MF) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index ed290ca..2186ff1 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -120,18 +120,17 @@ namespace { #include "SystemZGenDAGISel.inc" private: - bool SelectAddrRI12Only(SDNode *Op, SDValue& Addr, + bool SelectAddrRI12Only(SDValue& Addr, SDValue &Base, SDValue &Disp); - bool SelectAddrRI12(SDNode *Op, SDValue& Addr, + bool SelectAddrRI12(SDValue& Addr, SDValue &Base, SDValue &Disp, bool is12BitOnly = false); - bool SelectAddrRI(SDNode *Op, SDValue& Addr, - SDValue &Base, SDValue &Disp); - bool SelectAddrRRI12(SDNode *Op, SDValue Addr, + bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp); + bool SelectAddrRRI12(SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); - bool SelectAddrRRI20(SDNode *Op, SDValue Addr, + bool SelectAddrRRI20(SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); - bool SelectLAAddr(SDNode *Op, SDValue Addr, + bool SelectLAAddr(SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index); SDNode *Select(SDNode *Node); @@ -142,8 +141,6 @@ namespace { bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM, bool is12Bit, unsigned Depth = 0); bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM); - bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM, - bool is12Bit); }; } // end anonymous namespace @@ -355,12 +352,12 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM, /// Returns true if the address can be represented by a base register plus /// an unsigned 12-bit displacement [r+imm]. -bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDNode *Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr, SDValue &Base, SDValue &Disp) { - return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true); + return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true); } -bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr, SDValue &Base, SDValue &Disp, bool is12BitOnly) { SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true); @@ -410,7 +407,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI12(SDNode *Op, SDValue& Addr, /// Returns true if the address can be represented by a base register plus /// a signed 20-bit displacement [r+imm]. -bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr, +bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp) { SystemZRRIAddressMode AM(/*isRI*/true); bool Done = false; @@ -453,7 +450,7 @@ bool SystemZDAGToDAGISel::SelectAddrRI(SDNode *Op, SDValue& Addr, /// Returns true if the address can be represented by a base register plus /// index register plus an unsigned 12-bit displacement [base + idx + imm]. -bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM20, AM12; bool Done = false; @@ -502,7 +499,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI12(SDNode *Op, SDValue Addr, /// Returns true if the address can be represented by a base register plus /// index register plus a signed 20-bit displacement [base + idx + imm]. -bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM; bool Done = false; @@ -546,7 +543,7 @@ bool SystemZDAGToDAGISel::SelectAddrRRI20(SDNode *Op, SDValue Addr, /// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LA/LAY instruction. -bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr, +bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr, SDValue &Base, SDValue &Disp, SDValue &Index) { SystemZRRIAddressMode AM; @@ -583,7 +580,7 @@ bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index) { if (ISD::isNON_EXTLoad(N.getNode()) && IsLegalToFold(N, P, P, OptLevel)) - return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); + return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index); return false; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 67f739f..d694f2e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -147,8 +147,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : setOperationAction(ISD::FREM, MVT::f64, Expand); // We have only 64-bit bitconverts - setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); - setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + setOperationAction(ISD::BITCAST, MVT::f32, Expand); + setOperationAction(ISD::BITCAST, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); @@ -341,7 +341,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // from this parameter SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); } @@ -377,8 +377,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); // Offset to first argument stack slot. const unsigned FirstArgOffset = 160; @@ -431,7 +431,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, - (RegInfo->hasFP(MF) ? + (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D), getPointerTy()); @@ -441,7 +441,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, DAG.getIntPtrConstant(Offset)); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), Offset, + MachinePointerInfo(), false, false, 0)); } } @@ -471,7 +471,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); @@ -710,7 +710,7 @@ SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SDValue SystemZCC; SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SmallVector<SDValue, 4> Ops; Ops.push_back(TrueV); Ops.push_back(FalseV); @@ -747,7 +747,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, if (ExtraLoadRequired) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0, false, false, 0); + MachinePointerInfo::getGOT(), false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h index fa87061..2f2ef08 100644 --- a/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -115,9 +115,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { if (TID.mayStore()) Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - Flags, Offset, - MFI.getObjectSize(FI), + MF.getMachineMemOperand(MachinePointerInfo( + PseudoSourceValue::getFixedStack(FI), Offset), + Flags, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); return addOffset(MIB.addFrameIndex(FI), Offset) .addMemOperand(MMO); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 367bed3..be52803 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -28,28 +28,6 @@ using namespace llvm; SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)), RI(tm, *this), TM(tm) { - // Fill the spill offsets map - static const unsigned SpillOffsTab[][2] = { - { SystemZ::R2D, 0x10 }, - { SystemZ::R3D, 0x18 }, - { SystemZ::R4D, 0x20 }, - { SystemZ::R5D, 0x28 }, - { SystemZ::R6D, 0x30 }, - { SystemZ::R7D, 0x38 }, - { SystemZ::R8D, 0x40 }, - { SystemZ::R9D, 0x48 }, - { SystemZ::R10D, 0x50 }, - { SystemZ::R11D, 0x58 }, - { SystemZ::R12D, 0x60 }, - { SystemZ::R13D, 0x68 }, - { SystemZ::R14D, 0x70 }, - { SystemZ::R15D, 0x78 } - }; - - RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); - - for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i) - RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1]; } /// isGVStub - Return true if the GV requires an extra load to get the @@ -211,134 +189,6 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -bool -SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); - unsigned CalleeFrameSize = 0; - - // Scan the callee-saved and find the bounds of register spill area. - unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (!SystemZ::FP64RegClass.contains(Reg)) { - unsigned Offset = RegSpillOffsets[Reg]; - CalleeFrameSize += 8; - if (StartOffset > Offset) { - LowReg = Reg; StartOffset = Offset; - } - if (EndOffset < Offset) { - HighReg = Reg; EndOffset = RegSpillOffsets[Reg]; - } - } - } - - // Save information for epilogue inserter. - MFI->setCalleeSavedFrameSize(CalleeFrameSize); - MFI->setLowReg(LowReg); MFI->setHighReg(HighReg); - - // Save GPRs - if (StartOffset) { - // Build a store instruction. Use STORE MULTIPLE instruction if there are many - // registers to store, otherwise - just STORE. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, get((LowReg == HighReg ? - SystemZ::MOV64mr : SystemZ::MOV64mrm))); - - // Add store operands. - MIB.addReg(SystemZ::R15D).addImm(StartOffset); - if (LowReg == HighReg) - MIB.addReg(0); - MIB.addReg(LowReg, RegState::Kill); - if (LowReg != HighReg) - MIB.addReg(HighReg, RegState::Kill); - - // Do a second scan adding regs as being killed by instruction - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - if (Reg != LowReg && Reg != HighReg) - MIB.addReg(Reg, RegState::ImplicitKill); - } - } - - // Save FPRs - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (SystemZ::FP64RegClass.contains(Reg)) { - MBB.addLiveIn(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), - &SystemZ::FP64RegClass, &RI); - } - } - - return true; -} - -bool -SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - const TargetRegisterInfo *RegInfo= MF.getTarget().getRegisterInfo(); - SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); - - // Restore FP registers - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (SystemZ::FP64RegClass.contains(Reg)) - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - &SystemZ::FP64RegClass, &RI); - } - - // Restore GP registers - unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg(); - unsigned StartOffset = RegSpillOffsets[LowReg]; - - if (StartOffset) { - // Build a load instruction. Use LOAD MULTIPLE instruction if there are many - // registers to load, otherwise - just LOAD. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, get((LowReg == HighReg ? - SystemZ::MOV64rm : SystemZ::MOV64rmm))); - // Add store operands. - MIB.addReg(LowReg, RegState::Define); - if (LowReg != HighReg) - MIB.addReg(HighReg, RegState::Define); - - MIB.addReg((RegInfo->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D)); - MIB.addImm(StartOffset); - if (LowReg == HighReg) - MIB.addReg(0); - - // Do a second scan adding regs as being defined by instruction - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (Reg != LowReg && Reg != HighReg) - MIB.addReg(Reg, RegState::ImplicitDefine); - } - } - - return true; -} - bool SystemZInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 1 && "Invalid Xbranch condition!"); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index c248f24..6cb7200 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -50,7 +50,6 @@ namespace SystemZII { class SystemZInstrInfo : public TargetInstrInfoImpl { const SystemZRegisterInfo RI; SystemZTargetMachine &TM; - IndexedMap<unsigned> RegSpillOffsets; public: explicit SystemZInstrInfo(SystemZTargetMachine &TM); @@ -80,15 +79,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 8df07c0..11a39fc 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -46,15 +46,15 @@ def SDT_Address : SDTypeProfile<1, 1, // SystemZ Specific Node Definitions. //===----------------------------------------------------------------------===// def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>; + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def SystemZcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def SystemZcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SystemZCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SystemZcmp : SDNode<"SystemZISD::CMP", SDT_CmpTest>; def SystemZucmp : SDNode<"SystemZISD::UCMP", SDT_CmpTest>; def SystemZbrcond : SDNode<"SystemZISD::BRCOND", SDT_BrCond, @@ -229,19 +229,19 @@ def MOV64ri16 : RII<0x9A7, [(set GR64:$dst, immSExt16:$src)]>; def MOV64rill16 : RII<0xFA5, - (outs GR64:$dst), (ins i64imm:$src), + (outs GR64:$dst), (ins u16imm:$src), "llill\t{$dst, $src}", [(set GR64:$dst, i64ll16:$src)]>; def MOV64rilh16 : RII<0xEA5, - (outs GR64:$dst), (ins i64imm:$src), + (outs GR64:$dst), (ins u16imm:$src), "llilh\t{$dst, $src}", [(set GR64:$dst, i64lh16:$src)]>; def MOV64rihl16 : RII<0xDA5, - (outs GR64:$dst), (ins i64imm:$src), + (outs GR64:$dst), (ins u16imm:$src), "llihl\t{$dst, $src}", [(set GR64:$dst, i64hl16:$src)]>; def MOV64rihh16 : RII<0xCA5, - (outs GR64:$dst), (ins i64imm:$src), + (outs GR64:$dst), (ins u16imm:$src), "llihh\t{$dst, $src}", [(set GR64:$dst, i64hh16:$src)]>; @@ -250,10 +250,10 @@ def MOV64ri32 : RILI<0x1C0, "lgfi\t{$dst, $src}", [(set GR64:$dst, immSExt32:$src)]>; def MOV64rilo32 : RILI<0xFC0, - (outs GR64:$dst), (ins i64imm:$src), + (outs GR64:$dst), (ins u32imm:$src), "llilf\t{$dst, $src}", [(set GR64:$dst, i64lo32:$src)]>; -def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src), +def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src), "llihf\t{$dst, $src}", [(set GR64:$dst, i64hi32:$src)]>; } @@ -642,42 +642,42 @@ def AND64rm : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), (implicit PSW)]>; def AND32rill16 : RII<0xA57, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2), "nill\t{$dst, $src2}", [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>; def AND64rill16 : RII<0xA57, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), "nill\t{$dst, $src2}", [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>; def AND32rilh16 : RII<0xA56, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2), "nilh\t{$dst, $src2}", [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>; def AND64rilh16 : RII<0xA56, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), "nilh\t{$dst, $src2}", [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>; def AND64rihl16 : RII<0xA55, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), "nihl\t{$dst, $src2}", [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>; def AND64rihh16 : RII<0xA54, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), "nihh\t{$dst, $src2}", [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>; def AND32ri : RILI<0xC0B, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2), "nilf\t{$dst, $src2}", [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>; def AND64rilo32 : RILI<0xC0B, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2), "nilf\t{$dst, $src2}", [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>; def AND64rihi32 : RILI<0xC0A, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2), "nihf\t{$dst, $src2}", [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>; @@ -707,41 +707,41 @@ def OR64rm : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), // FIXME: Provide proper encoding! def OR32ri16 : RII<0xA5B, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2), "oill\t{$dst, $src2}", [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>; def OR32ri16h : RII<0xA5A, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2), "oilh\t{$dst, $src2}", [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>; def OR32ri : RILI<0xC0D, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2), "oilf\t{$dst, $src2}", [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>; def OR64rill16 : RII<0xA5B, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), "oill\t{$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>; def OR64rilh16 : RII<0xA5A, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), "oilh\t{$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>; def OR64rihl16 : RII<0xA59, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), "oihl\t{$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>; def OR64rihh16 : RII<0xA58, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), "oihh\t{$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>; def OR64rilo32 : RILI<0xC0D, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2), "oilf\t{$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>; def OR64rihi32 : RILI<0xC0C, - (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2), "oihf\t{$dst, $src2}", [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>; diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp index 4f7f70b..2dc7e7b 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp @@ -14,6 +14,7 @@ #include "SystemZMCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/ELF.h" using namespace llvm; SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) { @@ -24,6 +25,6 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) { const MCSection *SystemZMCAsmInfo:: getNonexecutableStackSection(MCContext &Ctx) const{ - return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, - 0, SectionKind::getMetadata(), false); + return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, + 0, SectionKind::getMetadata()); } diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 0de50fd..8b835cc 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -246,6 +246,14 @@ def s16imm : Operand<i32> { def s16imm64 : Operand<i64> { let PrintMethod = "printS16ImmOperand"; } +// Unsigned i16 +def u16imm : Operand<i32> { + let PrintMethod = "printU16ImmOperand"; +} +def u16imm64 : Operand<i64> { + let PrintMethod = "printU16ImmOperand"; +} + // Signed i20 def s20imm : Operand<i32> { let PrintMethod = "printS20ImmOperand"; @@ -260,6 +268,13 @@ def s32imm : Operand<i32> { def s32imm64 : Operand<i64> { let PrintMethod = "printS32ImmOperand"; } +// Unsigned i32 +def u32imm : Operand<i32> { + let PrintMethod = "printU32ImmOperand"; +} +def u32imm64 : Operand<i64> { + let PrintMethod = "printU32ImmOperand"; +} def imm_pcrel : Operand<i64> { let PrintMethod = "printPCRelImmOperand"; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index f8d3e6a..28f94f4 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -49,49 +49,21 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - if (hasFP(MF)) + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (TFI->hasFP(MF)) Reserved.set(SystemZ::R11D); Reserved.set(SystemZ::R14D); Reserved.set(SystemZ::R15D); return Reserved; } -/// needsFP - Return true if the specified function should have a dedicated -/// frame pointer register. This is true if the function has variable sized -/// allocas or if frame pointer elimination is disabled. -bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); -} - void SystemZRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { MBB.erase(I); } -int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const SystemZMachineFunctionInfo *SystemZMFI = - MF.getInfo<SystemZMachineFunctionInfo>(); - int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment(); - uint64_t StackSize = MFI->getStackSize(); - - // Fixed objects are really located in the "previous" frame. - if (FI < 0) - StackSize -= SystemZMFI->getCalleeSavedFrameSize(); - - Offset += StackSize - TFI.getOffsetOfLocalArea(); - - // Skip the register save area if we generated the stack frame. - if (StackSize || MFI->hasCalls()) - Offset -= TFI.getOffsetOfLocalArea(); - - return Offset; -} - void SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { @@ -100,6 +72,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned i = 0; MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + while (!MI.getOperand(i).isFI()) { ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); @@ -107,7 +81,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); - unsigned BasePtr = (hasFP(MF) ? SystemZ::R11D : SystemZ::R15D); + unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D); // This must be part of a rri or ri operand memory reference. Replace the // FrameIndex with base register with BasePtr. Add an offset to the @@ -117,7 +91,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Offset is a either 12-bit unsigned or 20-bit signed integer. // FIXME: handle "too long" displacements. int Offset = - getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm(); + TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm(); // Check whether displacement is too long to fit into 12 bit zext field. MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); @@ -125,178 +99,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Offset); } -void -SystemZRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - // Determine whether R15/R14 will ever be clobbered inside the function. And - // if yes - mark it as 'callee' saved. - MachineFrameInfo *FFI = MF.getFrameInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - // Check whether high FPRs are ever used, if yes - we need to save R15 as - // well. - static const unsigned HighFPRs[] = { - SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, - SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L, - SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, - SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, - }; - - bool HighFPRsUsed = false; - for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i) - HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]); - - if (FFI->hasCalls()) - /* FIXME: function is varargs */ - /* FIXME: function grabs RA */ - /* FIXME: function calls eh_return */ - MRI.setPhysRegUsed(SystemZ::R14D); - - if (HighFPRsUsed || - FFI->hasCalls() || - FFI->getObjectIndexEnd() != 0 || // Contains automatic variables - FFI->hasVarSizedObjects() // Function calls dynamic alloca's - /* FIXME: function is varargs */) - MRI.setPhysRegUsed(SystemZ::R15D); -} - -/// emitSPUpdate - Emit a series of instructions to increment / decrement the -/// stack pointer by a constant value. -static -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - int64_t NumBytes, const TargetInstrInfo &TII) { - unsigned Opc; uint64_t Chunk; - bool isSub = NumBytes < 0; - uint64_t Offset = isSub ? -NumBytes : NumBytes; - - if (Offset >= (1LL << 15) - 1) { - Opc = SystemZ::ADD64ri32; - Chunk = (1LL << 31) - 1; - } else { - Opc = SystemZ::ADD64ri16; - Chunk = (1LL << 15) - 1; - } - - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - while (Offset) { - uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D) - .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal); - // The PSW implicit def is dead. - MI->getOperand(3).setIsDead(); - Offset -= ThisVal; - } -} - -void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - SystemZMachineFunctionInfo *SystemZMFI = - MF.getInfo<SystemZMachineFunctionInfo>(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Get the number of bytes to allocate from the FrameInfo. - // Note that area for callee-saved stuff is already allocated, thus we need to - // 'undo' the stack movement. - uint64_t StackSize = MFI->getStackSize(); - StackSize -= SystemZMFI->getCalleeSavedFrameSize(); - - uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea(); - - // Skip the callee-saved push instructions. - while (MBBI != MBB.end() && - (MBBI->getOpcode() == SystemZ::MOV64mr || - MBBI->getOpcode() == SystemZ::MOV64mrm)) - ++MBBI; - - if (MBBI != MBB.end()) - DL = MBBI->getDebugLoc(); - - // adjust stack pointer: R15 -= numbytes - if (StackSize || MFI->hasCalls()) { - assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && - "Invalid stack frame calculation!"); - emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII); - } - - if (hasFP(MF)) { - // Update R11 with the new base value... - BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D) - .addReg(SystemZ::R15D); - - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); - I != E; ++I) - I->addLiveIn(SystemZ::R11D); - - } -} - -void SystemZRegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - SystemZMachineFunctionInfo *SystemZMFI = - MF.getInfo<SystemZMachineFunctionInfo>(); - unsigned RetOpcode = MBBI->getOpcode(); - - switch (RetOpcode) { - case SystemZ::RET: break; // These are ok - default: - assert(0 && "Can only insert epilog into returning blocks"); - } - - // Get the number of bytes to allocate from the FrameInfo - // Note that area for callee-saved stuff is already allocated, thus we need to - // 'undo' the stack movement. - uint64_t StackSize = - MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize(); - uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea(); - - // Skip the final terminator instruction. - while (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PI = prior(MBBI); - --MBBI; - if (!PI->getDesc().isTerminator()) - break; - } - - // During callee-saved restores emission stack frame was not yet finialized - // (and thus - the stack size was unknown). Tune the offset having full stack - // size in hands. - if (StackSize || MFI->hasCalls()) { - assert((MBBI->getOpcode() == SystemZ::MOV64rmm || - MBBI->getOpcode() == SystemZ::MOV64rm) && - "Expected to see callee-save register restore code"); - assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && - "Invalid stack frame calculation!"); - - unsigned i = 0; - MachineInstr &MI = *MBBI; - while (!MI.getOperand(i).isImm()) { - ++i; - assert(i < MI.getNumOperands() && "Unexpected restore code!"); - } - - uint64_t Offset = NumBytes + MI.getOperand(i).getImm(); - // If Offset does not fit into 20-bit signed displacement field we need to - // emit some additional code... - if (Offset > 524287) { - // Fold the displacement into load instruction as much as possible. - NumBytes = Offset - 524287; - Offset = 524287; - emitSPUpdate(MBB, MBBI, NumBytes, TII); - } - - MI.getOperand(i).ChangeToImmediate(Offset); - } -} - unsigned SystemZRegisterInfo::getRARegister() const { assert(0 && "What is the return address register"); return 0; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 5dae865..b450798 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -34,11 +34,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const { return true; } - bool hasFP(const MachineFunction &MF) const; - - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -46,13 +41,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; - - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 33be8dd..0028c85 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -190,8 +190,8 @@ def GR32 : RegisterClass<"SystemZ", [i32], 32, GR32Class::iterator GR32Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_REG32_nofp; else return SystemZ_REG32; @@ -199,8 +199,8 @@ def GR32 : RegisterClass<"SystemZ", [i32], 32, GR32Class::iterator GR32Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned)); else return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned)); @@ -237,8 +237,8 @@ def ADDR32 : RegisterClass<"SystemZ", [i32], 32, ADDR32Class::iterator ADDR32Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_ADDR32_nofp; else return SystemZ_ADDR32; @@ -246,8 +246,8 @@ def ADDR32 : RegisterClass<"SystemZ", [i32], 32, ADDR32Class::iterator ADDR32Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned)); else return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned)); @@ -284,8 +284,8 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64, GR64Class::iterator GR64Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_REG64_nofp; else return SystemZ_REG64; @@ -293,8 +293,8 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64, GR64Class::iterator GR64Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned)); else return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned)); @@ -331,8 +331,8 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, ADDR64Class::iterator ADDR64Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_ADDR64_nofp; else return SystemZ_ADDR64; @@ -340,8 +340,8 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, ADDR64Class::iterator ADDR64Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned)); else return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned)); @@ -368,8 +368,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64, GR64PClass::iterator GR64PClass::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_REG64P_nofp; else return SystemZ_REG64P; @@ -377,8 +377,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64, GR64PClass::iterator GR64PClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned)); else return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned)); @@ -405,8 +405,8 @@ def GR128 : RegisterClass<"SystemZ", [v2i64], 128, GR128Class::iterator GR128Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_REG128_nofp; else return SystemZ_REG128; @@ -414,8 +414,8 @@ def GR128 : RegisterClass<"SystemZ", [v2i64], 128, GR128Class::iterator GR128Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned)); else return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned)); diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index f45827b..1603899 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -30,7 +30,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" "-f64:64:64-f128:128:128-a0:16:16-n32:64"), InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) { + FrameLowering(Subtarget) { if (getRelocationModel() == Reloc::Default) setRelocationModel(Reloc::Static); diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 6af829b..524f83d 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -17,11 +17,12 @@ #include "SystemZInstrInfo.h" #include "SystemZISelLowering.h" +#include "SystemZFrameLowering.h" #include "SystemZSelectionDAGInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSubtarget.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -34,15 +35,14 @@ class SystemZTargetMachine : public LLVMTargetMachine { SystemZInstrInfo InstrInfo; SystemZTargetLowering TLInfo; SystemZSelectionDAGInfo TSInfo; - - // SystemZ does not have any call stack frame, therefore not having - // any SystemZ specific FrameInfo class. - TargetFrameInfo FrameInfo; + SystemZFrameLowering FrameLowering; public: SystemZTargetMachine(const Target &T, const std::string &TT, const std::string &FS); - virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } virtual const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout;} virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index f5c969a..0919fe4 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -7,12 +7,14 @@ // //===----------------------------------------------------------------------===// // -// This file implements the C bindings for libLLVMTarget.a, which implements -// target information. +// This file implements the common infrastructure (including C bindings) for +// libLLVMTarget.a, which implements target information. // //===----------------------------------------------------------------------===// #include "llvm-c/Target.h" +#include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetData.h" #include "llvm/LLVMContext.h" @@ -20,6 +22,15 @@ using namespace llvm; +void llvm::initializeTarget(PassRegistry &Registry) { + initializeTargetDataPass(Registry); + initializeTargetLibraryInfoPass(Registry); +} + +void LLVMInitializeTarget(LLVMPassRegistryRef R) { + initializeTarget(*unwrap(R)); +} + LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) { return wrap(new TargetData(StringRep)); } diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp new file mode 100644 index 0000000..6fa5420 --- /dev/null +++ b/lib/Target/TargetAsmInfo.cpp @@ -0,0 +1,27 @@ +//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) { + TLOF = &TM.getTargetLowering()->getObjFileLowering(); + const TargetData &TD = *TM.getTargetData(); + IsLittleEndian = TD.isLittleEndian(); + PointerSize = TD.getPointerSize(); + const TargetFrameLowering &TFI = *TM.getFrameLowering(); + StackDir = TFI.getStackGrowthDirection(); + TRI = TM.getRegisterInfo(); + TFI.getInitialFrameState(InitialFrameState); +} diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index f35c96d..c628df0 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -25,7 +25,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Mutex.h" +#include "llvm/Support/Mutex.h" #include "llvm/ADT/DenseMap.h" #include <algorithm> #include <cstdlib> @@ -34,7 +34,7 @@ using namespace llvm; // Handle the Pass registration stuff necessary to use TargetData's. // Register the default SparcV9 implementation... -INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true); +INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true) char TargetData::ID = 0; //===----------------------------------------------------------------------===// @@ -83,7 +83,7 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) && (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) && "Upper bound didn't work!"); - + // Multiple fields can have the same offset if any of them are zero sized. // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop // at the i32 element, because it is the last element at that offset. This is @@ -131,6 +131,8 @@ static unsigned getInt(StringRef R) { } void TargetData::init(StringRef Desc) { + initializeTargetDataPass(*PassRegistry::getPassRegistry()); + LayoutMap = 0; LittleEndian = false; PointerMemSize = 8; @@ -153,16 +155,16 @@ void TargetData::init(StringRef Desc) { std::pair<StringRef, StringRef> Split = Desc.split('-'); StringRef Token = Split.first; Desc = Split.second; - + if (Token.empty()) continue; - + Split = Token.split(':'); StringRef Specifier = Split.first; Token = Split.second; - + assert(!Specifier.empty() && "Can't be empty here"); - + switch (Specifier[0]) { case 'E': LittleEndian = false; @@ -197,7 +199,7 @@ void TargetData::init(StringRef Desc) { unsigned Size = getInt(Specifier.substr(1)); Split = Token.split(':'); unsigned ABIAlign = getInt(Split.first) / 8; - + Split = Split.second.split(':'); unsigned PrefAlign = getInt(Split.first) / 8; if (PrefAlign == 0) @@ -215,7 +217,7 @@ void TargetData::init(StringRef Desc) { Token = Split.second; } while (!Specifier.empty() || !Token.empty()); break; - + default: break; } @@ -231,7 +233,7 @@ TargetData::TargetData() : ImmutablePass(ID) { "Tool did not specify a TargetData to use?"); } -TargetData::TargetData(const Module *M) +TargetData::TargetData(const Module *M) : ImmutablePass(ID) { init(M->getDataLayout()); } @@ -249,14 +251,14 @@ TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align, return; } } - + Alignments.push_back(TargetAlignElem::get(align_type, abi_align, pref_align, bit_width)); } -/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or +/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or /// preferred if ABIInfo = false) the target wants for the specified datatype. -unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, +unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, uint32_t BitWidth, bool ABIInfo, const Type *Ty) const { // Check to see if we have an exact match and remember the best match we see. @@ -266,18 +268,18 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, if (Alignments[i].AlignType == AlignType && Alignments[i].TypeBitWidth == BitWidth) return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign; - + // The best match so far depends on what we're looking for. - if (AlignType == INTEGER_ALIGN && + if (AlignType == INTEGER_ALIGN && Alignments[i].AlignType == INTEGER_ALIGN) { // The "best match" for integers is the smallest size that is larger than // the BitWidth requested. - if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || + if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth)) BestMatchIdx = i; // However, if there isn't one that's larger, then we must use the // largest one we have (see below) - if (LargestInt == -1 || + if (LargestInt == -1 || Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth) LargestInt = i; } @@ -322,8 +324,8 @@ class StructLayoutMap : public AbstractTypeUser { I->first->removeAbstractTypeUser(this); LayoutInfo.erase(I); } - - + + /// refineAbstractType - The callback method invoked when an abstract type is /// resolved to another type. An object must override this method to update /// its internal state to reference NewType instead of OldType. @@ -385,21 +387,21 @@ TargetData::~TargetData() { const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { if (!LayoutMap) LayoutMap = new StructLayoutMap(); - + StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap); StructLayout *&SL = (*STM)[Ty]; if (SL) return SL; - // Otherwise, create the struct layout. Because it is variable length, we + // Otherwise, create the struct layout. Because it is variable length, we // malloc it, then use placement new. int NumElts = Ty->getNumElements(); StructLayout *L = (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t)); - + // Set SL before calling StructLayout's ctor. The ctor could cause other // entries to be added to TheMap, invalidating our reference. SL = L; - + new (L) StructLayout(Ty, *this); if (Ty->isAbstract()) @@ -414,14 +416,14 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { /// avoid a dangling pointer in this cache. void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { if (!LayoutMap) return; // No cache. - + static_cast<StructLayoutMap*>(LayoutMap)->InvalidateEntry(Ty); } std::string TargetData::getStringRepresentation() const { std::string Result; raw_string_ostream OS(Result); - + OS << (LittleEndian ? "e" : "E") << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8 << ':' << PointerPrefAlign*8; @@ -430,10 +432,10 @@ std::string TargetData::getStringRepresentation() const { OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':' << AI.ABIAlign*8 << ':' << AI.PrefAlign*8; } - + if (!LegalIntWidths.empty()) { OS << "-n" << (unsigned)LegalIntWidths[0]; - + for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i) OS << ':' << (unsigned)LegalIntWidths[i]; } @@ -461,6 +463,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { case Type::FloatTyID: return 32; case Type::DoubleTyID: + case Type::X86_MMXTyID: return 64; case Type::PPC_FP128TyID: case Type::FP128TyID: @@ -523,6 +526,7 @@ unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { case Type::X86_FP80TyID: AlignType = FLOAT_ALIGN; break; + case Type::X86_MMXTyID: case Type::VectorTyID: AlignType = VECTOR_ALIGN; break; diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp index 3631b35..a661ee9 100644 --- a/lib/Target/TargetELFWriterInfo.cpp +++ b/lib/Target/TargetELFWriterInfo.cpp @@ -17,9 +17,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) { - is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; - isLittleEndian = TM.getTargetData()->isLittleEndian(); +TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) : + is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) { } TargetELFWriterInfo::~TargetELFWriterInfo() {} diff --git a/lib/Target/TargetFrameInfo.cpp b/lib/Target/TargetFrameInfo.cpp deleted file mode 100644 index 873d60a..0000000 --- a/lib/Target/TargetFrameInfo.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//===-- TargetFrameInfo.cpp - Implement machine frame interface -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implements the layout of a stack frame on the target machine. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetFrameInfo.h" -#include <cstdlib> -using namespace llvm; - -TargetFrameInfo::~TargetFrameInfo() { -} diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/Target/TargetFrameLowering.cpp new file mode 100644 index 0000000..19fd581 --- /dev/null +++ b/lib/Target/TargetFrameLowering.cpp @@ -0,0 +1,53 @@ +//===----- TargetFrameLowering.cpp - Implement target frame interface ------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the layout of a stack frame on the target machine. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <cstdlib> +using namespace llvm; + +TargetFrameLowering::~TargetFrameLowering() { +} + +/// getInitialFrameState - Returns a list of machine moves that are assumed +/// on entry to a function. +void +TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) + const { + // Default is to do nothing. +} + +/// getFrameIndexOffset - Returns the displacement from the frame register to +/// the stack frame of the specified index. This is the default implementation +/// which is overridden for some targets. +int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getObjectOffset(FI) + MFI->getStackSize() - + getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); +} + +int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + + // By default, assume all frame indices are referenced via whatever + // getFrameRegister() says. The target can override this if it's doing + // something different. + FrameReg = RI->getFrameRegister(MF); + return getFrameIndexOffset(MF, FI); +} diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index c099a7e..97f3bf6 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -12,9 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/ErrorHandling.h" +#include <cctype> using namespace llvm; //===----------------------------------------------------------------------===// @@ -47,9 +50,85 @@ TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc, TargetInstrInfo::~TargetInstrInfo() { } +unsigned +TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + unsigned Class = MI->getDesc().getSchedClass(); + unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps) + return UOps; + + // The # of u-ops is dynamically determined. The specific target should + // override this function to return the right number. + return 1; +} + +int +TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + if (!ItinData || ItinData->isEmpty()) + return -1; + + unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned UseClass = UseMI->getDesc().getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +int +TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + if (!ItinData || ItinData->isEmpty()) + return -1; + + if (!DefNode->isMachineOpcode()) + return -1; + + unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); + if (!UseNode->isMachineOpcode()) + return ItinData->getOperandCycle(DefClass, DefIdx); + unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + return ItinData->getStageLatency(MI->getDesc().getSchedClass()); +} + +int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *N) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + if (!N->isMachineOpcode()) + return 1; + + return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); +} + +bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, + unsigned DefIdx) const { + if (!ItinData || ItinData->isEmpty()) + return false; + + unsigned DefClass = DefMI->getDesc().getSchedClass(); + int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); + return (DefCycle != -1 && DefCycle <= 1); +} + /// insertNoop - Insert a noop into the instruction stream at the specified /// point. -void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, +void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { llvm_unreachable("Target didn't implement insertNoop!"); } @@ -58,7 +137,7 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isTerminator()) return false; - + // Conditional branch is a special case. if (TID.isBranch() && !TID.isBarrier()) return true; @@ -78,15 +157,15 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { /// may be overloaded in the target code to do that. unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, const MCAsmInfo &MAI) const { - - + + // Count the number of instructions in the asm. bool atInsnStart = true; unsigned Length = 0; for (; *Str; ++Str) { if (*Str == '\n' || *Str == MAI.getSeparatorChar()) atInsnStart = true; - if (atInsnStart && !isspace(*Str)) { + if (atInsnStart && !std::isspace(*Str)) { Length += MAI.getMaxInstLength(); atInsnStart = false; } @@ -94,6 +173,6 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, strlen(MAI.getCommentString())) == 0) atInsnStart = false; } - + return Length; } diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp new file mode 100644 index 0000000..c8bed18 --- /dev/null +++ b/lib/Target/TargetLibraryInfo.cpp @@ -0,0 +1,55 @@ +//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetLibraryInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/ADT/Triple.h" +using namespace llvm; + +// Register the default implementation. +INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo", + "Target Library Information", false, true) +char TargetLibraryInfo::ID = 0; + +/// initialize - Initialize the set of available library functions based on the +/// specified target triple. This should be carefully written so that a missing +/// target triple gets a sane set of defaults. +static void initialize(TargetLibraryInfo &TLI, const Triple &T) { + initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); + + + // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later. + if (T.getOS() != Triple::Darwin || T.getDarwinMajorNumber() < 9) + TLI.setUnavailable(LibFunc::memset_pattern16); + +} + + +TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) { + // Default to everything being available. + memset(AvailableArray, -1, sizeof(AvailableArray)); + + initialize(*this, Triple()); +} + +TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) { + // Default to everything being available. + memset(AvailableArray, -1, sizeof(AvailableArray)); + + initialize(*this, T); +} + +/// disableAllFunctions - This disables all builtins, which is used for options +/// like -fno-builtin. +void TargetLibraryInfo::disableAllFunctions() { + memset(AvailableArray, 0, sizeof(AvailableArray)); +} diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index dd7b532..5d34c7d 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -43,8 +43,8 @@ TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) { StaticCtorSection = 0; StaticDtorSection = 0; LSDASection = 0; - EHFrameSection = 0; + CommDirectiveSupportsAlignment = true; DwarfAbbrevSection = 0; DwarfInfoSection = 0; DwarfLineSection = 0; @@ -168,6 +168,12 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, switch (C->getRelocationInfo()) { default: assert(0 && "unknown relocation info kind"); case Constant::NoRelocation: + // If the global is required to have a unique address, it can't be put + // into a mergable section: just drop it into the general read-only + // section instead. + if (!GVar->hasUnnamedAddr()) + return SectionKind::getReadOnly(); + // If initializer is a null-terminated string, put it in a "cstring" // section of the right width. if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 705b1c0..d579d95 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -219,7 +219,9 @@ FunctionSections("ffunction-sections", TargetMachine::TargetMachine(const Target &T) : TheTarget(T), AsmInfo(0), - MCRelaxAll(false) { + MCRelaxAll(false), + MCNoExecStack(false), + MCUseLoc(true) { // Typically it will be subtargets that will adjust FloatABIType from Default // to Soft or Hard. if (UseSoftFloat) diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 55f222c..4811ba5 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -13,10 +13,10 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -30,7 +30,7 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, AliasesHash(aliases), AliasesHashSize(aliasessize), Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) { - assert(NumRegs < FirstVirtualRegister && + assert(isPhysicalRegister(NumRegs) && "Target has too many physical registers!"); CallFrameSetupOpcode = CFSO; @@ -39,6 +39,25 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, TargetRegisterInfo::~TargetRegisterInfo() {} +void PrintReg::print(raw_ostream &OS) const { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); + else + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } +} + /// getMinimalPhysRegClass - Returns the Register Class of a physical /// register of the given type, picking the most sub register class of /// the right type that contains this physreg. @@ -82,29 +101,11 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, // Mask out the reserved registers BitVector Reserved = getReservedRegs(MF); - Allocatable ^= Reserved & Allocatable; + Allocatable &= Reserved.flip(); return Allocatable; } -/// getFrameIndexOffset - Returns the displacement from the frame register to -/// the stack frame of the specified index. This is the default implementation -/// which is overridden for some targets. -int TargetRegisterInfo::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->getObjectOffset(FI) + MFI->getStackSize() - - TFI.getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); -} - -/// getInitialFrameState - Returns a list of machine moves that are assumed -/// on entry to a function. -void -TargetRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const{ - // Default is to do nothing. -} - const TargetRegisterClass * llvm::getCommonSubClass(const TargetRegisterClass *A, const TargetRegisterClass *B) { diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp index 26797ab..ec73087 100644 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -65,9 +65,10 @@ public: } }; -} +} // end anonymous namespace -static unsigned MatchRegisterName(StringRef Name); +#define GET_REGISTER_MATCHER +#include "X86GenAsmMatcher.inc" AsmToken X86AsmLexer::LexTokenATT() { AsmToken lexedToken = lexDefinite(); @@ -162,7 +163,3 @@ extern "C" void LLVMInitializeX86AsmLexer() { RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target); RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target); } - -#define REGISTERS_ONLY -#include "X86GenAsmMatcher.inc" -#undef REGISTERS_ONLY diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index f8588d8..1cac07a 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -10,20 +10,21 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "X86Subtarget.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/Target/TargetAsmParser.h" using namespace llvm; namespace { @@ -43,35 +44,32 @@ private: bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - X86Operand *ParseOperand(); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); - bool MatchInstruction(SMLoc IDLoc, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCInst &Inst); + bool MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out); /// @name Auto-generated Matcher Functions /// { - unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; - - bool MatchInstructionImpl( - const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst); +#define GET_ASSEMBLER_HEADER +#include "X86GenAsmMatcher.inc" /// } public: - X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) - : TargetAsmParser(T), Parser(_Parser), TM(TM) { + X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM) + : TargetAsmParser(T), Parser(parser), TM(TM) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures( &TM.getSubtarget<X86Subtarget>())); } + virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -81,16 +79,16 @@ public: class X86_32ATTAsmParser : public X86ATTAsmParser { public: - X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) - : X86ATTAsmParser(T, _Parser, TM) { + X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) + : X86ATTAsmParser(T, Parser, TM) { Is64Bit = false; } }; class X86_64ATTAsmParser : public X86ATTAsmParser { public: - X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM) - : X86ATTAsmParser(T, _Parser, TM) { + X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM) + : X86ATTAsmParser(T, Parser, TM) { Is64Bit = true; } }; @@ -375,14 +373,18 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); + // If the match failed, try the register name as lowercase. + if (RegNo == 0) + RegNo = MatchRegisterName(LowercaseString(Tok.getString())); + // FIXME: This should be done using Requires<In32BitMode> and // Requires<In64BitMode> so "eiz" usage in 64-bit instructions // can be also checked. if (RegNo == X86::RIZ && !Is64Bit) return Error(Tok.getLoc(), "riz register in 64-bit mode only"); - // Parse %st(1) and "%st" as "%st(0)" - if (RegNo == 0 && Tok.getString() == "st") { + // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. + if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) { RegNo = X86::ST0; EndLoc = Tok.getLoc(); Parser.Lex(); // Eat 'st' @@ -617,88 +619,13 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { bool X86ATTAsmParser:: ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - // The various flavors of pushf and popf use Requires<In32BitMode> and - // Requires<In64BitMode>, but the assembler doesn't yet implement that. - // For now, just do a manual check to prevent silent misencoding. - if (Is64Bit) { - if (Name == "popfl") - return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); - else if (Name == "pushfl") - return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); - else if (Name == "pusha") - return Error(NameLoc, "pusha cannot be encoded in 64-bit mode"); - } else { - if (Name == "popfq") - return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); - else if (Name == "pushfq") - return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); - } - - // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and - // the form jrcxz is not allowed in 32-bit mode. - if (Is64Bit) { - if (Name == "jcxz") - return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode"); - } else { - if (Name == "jrcxz") - return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode"); - } - - // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to - // represent alternative syntaxes in the .td file, without requiring - // instruction duplication. - StringRef PatchedName = StringSwitch<StringRef>(Name) - .Case("sal", "shl") - .Case("salb", "shlb") - .Case("sall", "shll") - .Case("salq", "shlq") - .Case("salw", "shlw") - .Case("repe", "rep") - .Case("repz", "rep") - .Case("repnz", "repne") - .Case("pushf", Is64Bit ? "pushfq" : "pushfl") - .Case("popf", Is64Bit ? "popfq" : "popfl") - .Case("retl", Is64Bit ? "retl" : "ret") - .Case("retq", Is64Bit ? "ret" : "retq") - .Case("setz", "sete") - .Case("setnz", "setne") - .Case("jz", "je") - .Case("jnz", "jne") - .Case("jc", "jb") - // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode - // jecxz requires an AdSize prefix but jecxz does not have a prefix in - // 32-bit mode. - .Case("jecxz", "jcxz") - .Case("jrcxz", "jcxz") - .Case("jna", "jbe") - .Case("jnae", "jb") - .Case("jnb", "jae") - .Case("jnbe", "ja") - .Case("jnc", "jae") - .Case("jng", "jle") - .Case("jnge", "jl") - .Case("jnl", "jge") - .Case("jnle", "jg") - .Case("jpe", "jp") - .Case("jpo", "jnp") - .Case("cmovcl", "cmovbl") - .Case("cmovcl", "cmovbl") - .Case("cmovnal", "cmovbel") - .Case("cmovnbl", "cmovael") - .Case("cmovnbel", "cmoval") - .Case("cmovncl", "cmovael") - .Case("cmovngl", "cmovlel") - .Case("cmovnl", "cmovgel") - .Case("cmovngl", "cmovlel") - .Case("cmovngel", "cmovll") - .Case("cmovnll", "cmovgel") - .Case("cmovnlel", "cmovgl") - .Case("cmovnzl", "cmovnel") - .Case("cmovzl", "cmovel") - .Case("fwait", "wait") - .Case("movzx", "movzb") - .Default(Name); + StringRef PatchedName = Name; + // FIXME: Hack to recognize setneb as setne. + if (PatchedName.startswith("set") && PatchedName.endswith("b") && + PatchedName != "setb" && PatchedName != "setnb") + PatchedName = PatchedName.substr(0, Name.size()-1); + // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && @@ -773,12 +700,26 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, PatchedName = "vpclmulqdq"; } } + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (ExtraImmOp) Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); - if (getLexer().isNot(AsmToken::EndOfStatement)) { + + // Determine whether this is an instruction prefix. + bool isPrefix = + Name == "lock" || Name == "rep" || + Name == "repe" || Name == "repz" || + Name == "repne" || Name == "repnz" || + Name == "rex64" || Name == "data16"; + + + // This does the actual operand parsing. Don't parse any more if we have a + // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we + // just want to parse the "lock" as the first instruction and the "incl" as + // the next one. + if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) { @@ -790,8 +731,10 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Read the first operand. if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); - else + else { + Parser.EatToEndOfStatement(); return true; + } while (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. @@ -799,23 +742,27 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Parse and remember the operand. if (X86Operand *Op = ParseOperand()) Operands.push_back(Op); - else + else { + Parser.EatToEndOfStatement(); return true; + } } - } - // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1. - if ((Name.startswith("shr") || Name.startswith("sar") || - Name.startswith("shl")) && - Operands.size() == 3 && - static_cast<X86Operand*>(Operands[1])->isImm() && - isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) && - cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) { - delete Operands[1]; - Operands.erase(Operands.begin() + 1); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + Parser.EatToEndOfStatement(); + return Error(Loc, "unexpected token in argument list"); + } } - // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx". + if (getLexer().is(AsmToken::EndOfStatement)) + Parser.Lex(); // Consume the EndOfStatement + else if (isPrefix && getLexer().is(AsmToken::Slash)) + Parser.Lex(); // Consume the prefix separator Slash + + // This is a terrible hack to handle "out[bwl]? %al, (%dx)" -> + // "outb %al, %dx". Out doesn't take a memory form, but this is a widely + // documented form in various unofficial manuals, so a lot of code uses it. if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && Operands.size() == 3) { X86Operand &Op = *(X86Operand*)Operands.back(); @@ -829,76 +776,80 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, } } - // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as - // "f{mul*,add*,sub*,div*} $op" - if ((Name.startswith("fmul") || Name.startswith("fadd") || - Name.startswith("fsub") || Name.startswith("fdiv")) && - Operands.size() == 3 && - static_cast<X86Operand*>(Operands[2])->isReg() && - static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) { - delete Operands[2]; - Operands.erase(Operands.begin() + 2); - } - - // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B, - // B". - if (Name.startswith("imul") && Operands.size() == 3 && - static_cast<X86Operand*>(Operands[1])->isImm() && - static_cast<X86Operand*>(Operands.back())->isReg()) { - X86Operand *Op = static_cast<X86Operand*>(Operands.back()); - Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(), - Op->getEndLoc())); - } - - return false; -} - -bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { - StringRef IDVal = DirectiveID.getIdentifier(); - if (IDVal == ".word") - return ParseDirectiveWord(2, DirectiveID.getLoc()); - return true; -} - -/// ParseDirectiveWord -/// ::= .word [ expression (, expression)* ] -bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - const MCExpr *Value; - if (getParser().ParseExpression(Value)) - return true; - - getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); - - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) - return Error(L, "unexpected token in directive"); - Parser.Lex(); + // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to + // "shift <op>". + if ((Name.startswith("shr") || Name.startswith("sar") || + Name.startswith("shl") || Name.startswith("sal") || + Name.startswith("rcl") || Name.startswith("rcr") || + Name.startswith("rol") || Name.startswith("ror")) && + Operands.size() == 3) { + X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); + if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && + cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); } } - Parser.Lex(); return false; } - -bool -X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, - const SmallVectorImpl<MCParsedAsmOperand*> - &Operands, - MCInst &Inst) { +bool X86ATTAsmParser:: +MatchAndEmitInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + MCStreamer &Out) { assert(!Operands.empty() && "Unexpect empty operand list!"); - X86Operand *Op = static_cast<X86Operand*>(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + // First, handle aliases that expand to multiple instructions. + // FIXME: This should be replaced with a real .td file alias mechanism. + // Also, MatchInstructionImpl should do actually *do* the EmitInstruction + // call. + if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || + Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || + Op->getToken() == "finit" || Op->getToken() == "fsave" || + Op->getToken() == "fstenv" || Op->getToken() == "fclex") { + MCInst Inst; + Inst.setOpcode(X86::WAIT); + Out.EmitInstruction(Inst); + + const char *Repl = + StringSwitch<const char*>(Op->getToken()) + .Case("finit", "fninit") + .Case("fsave", "fnsave") + .Case("fstcw", "fnstcw") + .Case("fstcww", "fnstcw") + .Case("fstenv", "fnstenv") + .Case("fstsw", "fnstsw") + .Case("fstsww", "fnstsw") + .Case("fclex", "fnclex") + .Default(0); + assert(Repl && "Unknown wait-prefixed instruction"); + delete Operands[0]; + Operands[0] = X86Operand::CreateToken(Repl, IDLoc); + } + + bool WasOriginallyInvalidOperand = false; + unsigned OrigErrorInfo; + MCInst Inst; + // First, try a direct match. - if (!MatchInstructionImpl(Operands, Inst)) + switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) { + case Match_Success: + Out.EmitInstruction(Inst); return false; + case Match_MissingFeature: + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + case Match_ConversionFail: + return Error(IDLoc, "unable to convert operands to instruction"); + case Match_InvalidOperand: + WasOriginallyInvalidOperand = true; + break; + case Match_MnemonicFail: + break; + } // FIXME: Ideally, we would only attempt suffix matches for things which are // valid prefixes, and we could just infer the right unambiguous @@ -912,15 +863,26 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, Tmp += ' '; Op->setTokenValue(Tmp.str()); + // If this instruction starts with an 'f', then it is a floating point stack + // instruction. These come in up to three forms for 32-bit, 64-bit, and + // 80-bit floating point, which use the suffixes s,l,t respectively. + // + // Otherwise, we assume that this may be an integer instruction, which comes + // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. + const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; + // Check for the various suffix matches. - Tmp[Base.size()] = 'b'; - bool MatchB = MatchInstructionImpl(Operands, Inst); - Tmp[Base.size()] = 'w'; - bool MatchW = MatchInstructionImpl(Operands, Inst); - Tmp[Base.size()] = 'l'; - bool MatchL = MatchInstructionImpl(Operands, Inst); - Tmp[Base.size()] = 'q'; - bool MatchQ = MatchInstructionImpl(Operands, Inst); + Tmp[Base.size()] = Suffixes[0]; + unsigned ErrorInfoIgnore; + MatchResultTy Match1, Match2, Match3, Match4; + + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Tmp[Base.size()] = Suffixes[1]; + Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Tmp[Base.size()] = Suffixes[2]; + Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); + Tmp[Base.size()] = Suffixes[3]; + Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); // Restore the old token. Op->setTokenValue(Base); @@ -928,24 +890,25 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing // matches won't have modified it). - if (MatchB + MatchW + MatchL + MatchQ == 3) + unsigned NumSuccessfulMatches = + (Match1 == Match_Success) + (Match2 == Match_Success) + + (Match3 == Match_Success) + (Match4 == Match_Success); + if (NumSuccessfulMatches == 1) { + Out.EmitInstruction(Inst); return false; + } - // Otherwise, the match failed. + // Otherwise, the match failed, try to produce a decent error message. // If we had multiple suffix matches, then identify this as an ambiguous // match. - if (MatchB + MatchW + MatchL + MatchQ != 4) { + if (NumSuccessfulMatches > 1) { char MatchChars[4]; unsigned NumMatches = 0; - if (!MatchB) - MatchChars[NumMatches++] = 'b'; - if (!MatchW) - MatchChars[NumMatches++] = 'w'; - if (!MatchL) - MatchChars[NumMatches++] = 'l'; - if (!MatchQ) - MatchChars[NumMatches++] = 'q'; + if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0]; + if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1]; + if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2]; + if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3]; SmallString<126> Msg; raw_svector_ostream OS(Msg); @@ -959,14 +922,90 @@ X86ATTAsmParser::MatchInstruction(SMLoc IDLoc, } OS << ")"; Error(IDLoc, OS.str()); - } else { - // FIXME: We should give nicer diagnostics about the exact failure. - Error(IDLoc, "unrecognized instruction"); + return true; } + // Okay, we know that none of the variants matched successfully. + + // If all of the instructions reported an invalid mnemonic, then the original + // mnemonic was invalid. + if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && + (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { + if (!WasOriginallyInvalidOperand) { + Error(IDLoc, "invalid instruction mnemonic '" + Base + "'"); + return true; + } + + // Recover location info for the operand if we know which was the problem. + SMLoc ErrorLoc = IDLoc; + if (OrigErrorInfo != ~0U) { + if (OrigErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + + // If one instruction matched with a missing feature, report this as a + // missing feature. + if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + + (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + + // If one instruction matched with an invalid operand, report this as an + // operand failure. + if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + + (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ + Error(IDLoc, "invalid operand for instruction"); + return true; + } + + // If all of these were an outright failure, report it in a useless way. + // FIXME: We should give nicer diagnostics about the exact failure. + Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); + return true; +} + + +bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".word") + return ParseDirectiveWord(2, DirectiveID.getLoc()); return true; } +/// ParseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + + Parser.Lex(); + return false; +} + + + extern "C" void LLVMInitializeX86AsmLexer(); @@ -977,4 +1016,6 @@ extern "C" void LLVMInitializeX86AsmParser() { LLVMInitializeX86AsmLexer(); } +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION #include "X86GenAsmMatcher.inc" diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 033973e..0000000 --- a/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMX86AsmPrinter - X86ATTInstPrinter.cpp - X86IntelInstPrinter.cpp - X86InstComments.cpp - ) -add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile deleted file mode 100644 index c82aa33..0000000 --- a/lib/Target/X86/AsmPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86AsmPrinter - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp deleted file mode 100644 index 554b96c..0000000 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file includes code for rendering MCInst instances as AT&T-style -// assembly. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "X86ATTInstPrinter.h" -#include "X86InstComments.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormattedStream.h" -#include "X86GenInstrNames.inc" -using namespace llvm; - -// Include the auto-generated portion of the assembly writer. -#define MachineInstr MCInst -#define GET_INSTRUCTION_NAME -#include "X86GenAsmWriter.inc" -#undef MachineInstr - -void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { - printInstruction(MI, OS); - - // If verbose assembly is enabled, we can print some informative comments. - if (CommentStream) - EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); -} -StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - - -void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, - raw_ostream &O) { - switch (MI->getOperand(Op).getImm()) { - default: assert(0 && "Invalid ssecc argument!"); - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; - } -} - -/// print_pcrel_imm - This is used to print an immediate value that ends up -/// being encoded as a pc-relative value (e.g. for jumps and calls). These -/// print slightly differently than normal immediates. For example, a $ is not -/// emitted. -void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isImm()) - // Print this as a signed 32-bit value. - O << (int)Op.getImm(); - else { - assert(Op.isExpr() && "unknown pcrel immediate operand"); - O << *Op.getExpr(); - } -} - -void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - O << '%' << getRegisterName(Op.getReg()); - } else if (Op.isImm()) { - O << '$' << Op.getImm(); - - if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) - *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); - - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << '$' << *Op.getExpr(); - } -} - -void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &BaseReg = MI->getOperand(Op); - const MCOperand &IndexReg = MI->getOperand(Op+2); - const MCOperand &DispSpec = MI->getOperand(Op+3); - const MCOperand &SegReg = MI->getOperand(Op+4); - - // If this has a segment register, print it. - if (SegReg.getReg()) { - printOperand(MI, Op+4, O); - O << ':'; - } - - if (DispSpec.isImm()) { - int64_t DispVal = DispSpec.getImm(); - if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) - O << DispVal; - } else { - assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); - O << *DispSpec.getExpr(); - } - - if (IndexReg.getReg() || BaseReg.getReg()) { - O << '('; - if (BaseReg.getReg()) - printOperand(MI, Op, O); - - if (IndexReg.getReg()) { - O << ','; - printOperand(MI, Op+2, O); - unsigned ScaleVal = MI->getOperand(Op+1).getImm(); - if (ScaleVal != 1) - O << ',' << ScaleVal; - } - O << ')'; - } -} diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h deleted file mode 100644 index eb98664..0000000 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ /dev/null @@ -1,81 +0,0 @@ -//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an X86 MCInst to AT&T style .s file syntax. -// -//===----------------------------------------------------------------------===// - -#ifndef X86_ATT_INST_PRINTER_H -#define X86_ATT_INST_PRINTER_H - -#include "llvm/MC/MCInstPrinter.h" - -namespace llvm { - class MCOperand; - -class X86ATTInstPrinter : public MCInstPrinter { -public: - X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} - - - virtual void printInst(const MCInst *MI, raw_ostream &OS); - virtual StringRef getOpcodeName(unsigned Opcode) const; - - // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &OS); - static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); - - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); - void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); - void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS); - void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); - - void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - - void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } - void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printMemReference(MI, OpNo, O); - } -}; - -} - -#endif diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/AsmPrinter/X86InstComments.cpp deleted file mode 100644 index da9d5a3..0000000 --- a/lib/Target/X86/AsmPrinter/X86InstComments.cpp +++ /dev/null @@ -1,232 +0,0 @@ -//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This defines functionality used to emit comments about X86 instructions to -// an output stream for -fverbose-asm. -// -//===----------------------------------------------------------------------===// - -#include "X86InstComments.h" -#include "X86GenInstrNames.inc" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/raw_ostream.h" -#include "../X86ShuffleDecode.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Top Level Entrypoint -//===----------------------------------------------------------------------===// - -/// EmitAnyX86InstComments - This function decodes x86 instructions and prints -/// newline terminated strings to the specified string if desired. This -/// information is shown in disassembly dumps when verbose assembly is enabled. -void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, - const char *(*getRegName)(unsigned)) { - // If this is a shuffle operation, the switch should fill in this state. - SmallVector<unsigned, 8> ShuffleMask; - const char *DestName = 0, *Src1Name = 0, *Src2Name = 0; - - switch (MI->getOpcode()) { - case X86::INSERTPSrr: - Src1Name = getRegName(MI->getOperand(1).getReg()); - Src2Name = getRegName(MI->getOperand(2).getReg()); - DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); - break; - - case X86::MOVLHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeMOVLHPSMask(2, ShuffleMask); - break; - - case X86::MOVHLPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodeMOVHLPSMask(2, ShuffleMask); - break; - - case X86::PSHUFDri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. - case X86::PSHUFDmi: - DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - break; - - case X86::PSHUFHWri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. - case X86::PSHUFHWmi: - DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - break; - case X86::PSHUFLWri: - Src1Name = getRegName(MI->getOperand(1).getReg()); - // FALL THROUGH. - case X86::PSHUFLWmi: - DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), - ShuffleMask); - break; - - case X86::PUNPCKHBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHBWrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(16, ShuffleMask); - break; - case X86::PUNPCKHWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHWDrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(8, ShuffleMask); - break; - case X86::PUNPCKHDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(4, ShuffleMask); - break; - case X86::PUNPCKHQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKHQDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(2, ShuffleMask); - break; - - case X86::PUNPCKLBWrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLBWrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(16, ShuffleMask); - break; - case X86::PUNPCKLWDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLWDrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(8, ShuffleMask); - break; - case X86::PUNPCKLDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(4, ShuffleMask); - break; - case X86::PUNPCKLQDQrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::PUNPCKLQDQrm: - Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(2, ShuffleMask); - break; - - case X86::SHUFPDrri: - DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - Src2Name = getRegName(MI->getOperand(2).getReg()); - break; - - case X86::SHUFPSrri: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::SHUFPSrmi: - DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; - - case X86::UNPCKLPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKLPDrm: - DecodeUNPCKLPMask(2, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; - case X86::UNPCKLPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKLPSrm: - DecodeUNPCKLPMask(4, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; - case X86::UNPCKHPDrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPDrm: - DecodeUNPCKHPMask(2, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; - case X86::UNPCKHPSrr: - Src2Name = getRegName(MI->getOperand(2).getReg()); - // FALL THROUGH. - case X86::UNPCKHPSrm: - DecodeUNPCKHPMask(4, ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); - break; - } - - - // If this was a shuffle operation, print the shuffle mask. - if (!ShuffleMask.empty()) { - if (DestName == 0) DestName = Src1Name; - OS << (DestName ? DestName : "mem") << " = "; - - // If the two sources are the same, canonicalize the input elements to be - // from the first src so that we get larger element spans. - if (Src1Name == Src2Name) { - for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { - if ((int)ShuffleMask[i] >= 0 && // Not sentinel. - ShuffleMask[i] >= e) // From second mask. - ShuffleMask[i] -= e; - } - } - - // The shuffle mask specifies which elements of the src1/src2 fill in the - // destination, with a few sentinel values. Loop through and print them - // out. - for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { - if (i != 0) - OS << ','; - if (ShuffleMask[i] == SM_SentinelZero) { - OS << "zero"; - continue; - } - - // Otherwise, it must come from src1 or src2. Print the span of elements - // that comes from this src. - bool isSrc1 = ShuffleMask[i] < ShuffleMask.size(); - const char *SrcName = isSrc1 ? Src1Name : Src2Name; - OS << (SrcName ? SrcName : "mem") << '['; - bool IsFirst = true; - while (i != e && - (int)ShuffleMask[i] >= 0 && - (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) { - if (!IsFirst) - OS << ','; - else - IsFirst = false; - OS << ShuffleMask[i] % ShuffleMask.size(); - ++i; - } - OS << ']'; - --i; // For loop increments element #. - } - //MI->print(OS, 0); - OS << "\n"; - } - -} diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/AsmPrinter/X86InstComments.h deleted file mode 100644 index 6b86db4..0000000 --- a/lib/Target/X86/AsmPrinter/X86InstComments.h +++ /dev/null @@ -1,25 +0,0 @@ -//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This defines functionality used to emit comments about X86 instructions to -// an output stream for -fverbose-asm. -// -//===----------------------------------------------------------------------===// - -#ifndef X86_INST_COMMENTS_H -#define X86_INST_COMMENTS_H - -namespace llvm { - class MCInst; - class raw_ostream; - void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, - const char *(*getRegName)(unsigned)); -} - -#endif diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp deleted file mode 100644 index 5625b0e..0000000 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ /dev/null @@ -1,140 +0,0 @@ -//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file includes code for rendering MCInst instances as AT&T-style -// assembly. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "X86IntelInstPrinter.h" -#include "X86InstComments.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "X86GenInstrNames.inc" -using namespace llvm; - -// Include the auto-generated portion of the assembly writer. -#define MachineInstr MCInst -#define GET_INSTRUCTION_NAME -#include "X86GenAsmWriter1.inc" -#undef MachineInstr - -void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { - printInstruction(MI, OS); - - // If verbose assembly is enabled, we can print some informative comments. - if (CommentStream) - EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); -} -StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - -void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, - raw_ostream &O) { - switch (MI->getOperand(Op).getImm()) { - default: assert(0 && "Invalid ssecc argument!"); - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; - } -} - -/// print_pcrel_imm - This is used to print an immediate value that ends up -/// being encoded as a pc-relative value. -void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isImm()) - O << Op.getImm(); - else { - assert(Op.isExpr() && "unknown pcrel immediate operand"); - O << *Op.getExpr(); - } -} - -static void PrintRegName(raw_ostream &O, StringRef RegName) { - for (unsigned i = 0, e = RegName.size(); i != e; ++i) - O << (char)toupper(RegName[i]); -} - -void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - PrintRegName(O, getRegisterName(Op.getReg())); - } else if (Op.isImm()) { - O << Op.getImm(); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); - } -} - -void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &BaseReg = MI->getOperand(Op); - unsigned ScaleVal = MI->getOperand(Op+1).getImm(); - const MCOperand &IndexReg = MI->getOperand(Op+2); - const MCOperand &DispSpec = MI->getOperand(Op+3); - const MCOperand &SegReg = MI->getOperand(Op+4); - - // If this has a segment register, print it. - if (SegReg.getReg()) { - printOperand(MI, Op+4, O); - O << ':'; - } - - O << '['; - - bool NeedPlus = false; - if (BaseReg.getReg()) { - printOperand(MI, Op, O); - NeedPlus = true; - } - - if (IndexReg.getReg()) { - if (NeedPlus) O << " + "; - if (ScaleVal != 1) - O << ScaleVal << '*'; - printOperand(MI, Op+2, O); - NeedPlus = true; - } - - - if (!DispSpec.isImm()) { - if (NeedPlus) O << " + "; - assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); - O << *DispSpec.getExpr(); - } else { - int64_t DispVal = DispSpec.getImm(); - if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { - if (NeedPlus) { - if (DispVal > 0) - O << " + "; - else { - O << " - "; - DispVal = -DispVal; - } - } - O << DispVal; - } - } - - O << ']'; -} diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h deleted file mode 100644 index 6f12032..0000000 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ /dev/null @@ -1,95 +0,0 @@ -//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an X86 MCInst to intel style .s file syntax. -// -//===----------------------------------------------------------------------===// - -#ifndef X86_INTEL_INST_PRINTER_H -#define X86_INTEL_INST_PRINTER_H - -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - class MCOperand; - -class X86IntelInstPrinter : public MCInstPrinter { -public: - X86IntelInstPrinter(const MCAsmInfo &MAI) - : MCInstPrinter(MAI) {} - - virtual void printInst(const MCInst *MI, raw_ostream &OS); - virtual StringRef getOpcodeName(unsigned Opcode) const; - - // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); - - - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); - void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); - void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O); - - void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "OPAQUE PTR "; - printMemReference(MI, OpNo, O); - } - - void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "BYTE PTR "; - printMemReference(MI, OpNo, O); - } - void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "WORD PTR "; - printMemReference(MI, OpNo, O); - } - void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "DWORD PTR "; - printMemReference(MI, OpNo, O); - } - void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "QWORD PTR "; - printMemReference(MI, OpNo, O); - } - void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "XMMWORD PTR "; - printMemReference(MI, OpNo, O); - } - void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "YMMWORD PTR "; - printMemReference(MI, OpNo, O); - } - void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "DWORD PTR "; - printMemReference(MI, OpNo, O); - } - void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "QWORD PTR "; - printMemReference(MI, OpNo, O); - } - void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "XWORD PTR "; - printMemReference(MI, OpNo, O); - } - void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "XMMWORD PTR "; - printMemReference(MI, OpNo, O); - } - void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "YMMWORD PTR "; - printMemReference(MI, OpNo, O); - } -}; - -} - -#endif diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index e9399f5..b5fa94f 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -24,10 +24,12 @@ set(sources X86ELFWriterInfo.cpp X86FastISel.cpp X86FloatingPoint.cpp + X86FrameLowering.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86InstrInfo.cpp X86JITInfo.cpp + X86MachObjectWriter.cpp X86MCAsmInfo.cpp X86MCCodeEmitter.cpp X86MCInstLower.cpp @@ -39,14 +41,24 @@ set(sources ) if( CMAKE_CL_64 ) + # A workaround for a bug in cmake 2.8.3. See PR 8885. + if( CMAKE_VERSION STREQUAL "2.8.3" ) + include(CMakeDetermineCompilerId) + endif() + # end of workaround. enable_language(ASM_MASM) ADD_CUSTOM_COMMAND( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj + MAIN_DEPENDENCY X86CompilationCallback_Win64.asm COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm ) set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj) endif() add_llvm_target(X86CodeGen ${sources}) +add_subdirectory(AsmParser) +add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) +add_subdirectory(Utils) diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 97589c0..972a0d9 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -5,7 +5,7 @@ add_llvm_library(LLVMX86Disassembler X86DisassemblerDecoder.c ) # workaround for hanging compilation on MSVC9 and 10 -if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) +if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) set_property( SOURCE X86Disassembler.cpp PROPERTY COMPILE_FLAGS "/Od" diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 09f1584..691e2d7 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -157,9 +157,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// @param immediate - The immediate value to append. /// @param operand - The operand, as stored in the descriptor table. /// @param insn - The internal instruction. -static void translateImmediate(MCInst &mcInst, - uint64_t immediate, - OperandSpecifier &operand, +static void translateImmediate(MCInst &mcInst, uint64_t immediate, + const OperandSpecifier &operand, InternalInstruction &insn) { // Sign-extend the immediate if necessary. @@ -392,9 +391,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. /// @return - 0 on success; nonzero otherwise -static bool translateRM(MCInst &mcInst, - OperandSpecifier &operand, - InternalInstruction &insn) { +static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, + InternalInstruction &insn) { switch (operand.type) { default: debug("Unexpected type for a R/M operand"); @@ -461,9 +459,8 @@ static bool translateFPRegister(MCInst &mcInst, /// @param operand - The operand, as stored in the descriptor table. /// @param insn - The internal instruction. /// @return - false on success; true otherwise. -static bool translateOperand(MCInst &mcInst, - OperandSpecifier &operand, - InternalInstruction &insn) { +static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, + InternalInstruction &insn) { switch (operand.encoding) { default: debug("Unhandled operand encoding during translation"); diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 9c54262..550cf9d 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -78,7 +78,7 @@ const char* name; #define INSTRUCTION_IDS \ - InstrUID* instructionIDs; + const InstrUID *instructionIDs; #include "X86DisassemblerDecoderCommon.h" diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 6c3ff6b..b6546fc 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -27,12 +27,6 @@ typedef int8_t bool; -#ifdef __GNUC__ -#define NORETURN __attribute__((noreturn)) -#else -#define NORETURN -#endif - #ifndef NDEBUG #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) #else @@ -103,7 +97,7 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM) { - struct ModRMDecision* dec; + const struct ModRMDecision* dec; switch (type) { default: @@ -147,7 +141,7 @@ static InstrUID decode(OpcodeType type, * decode(); specifierForUID will not check bounds. * @return - A pointer to the specification for that instruction. */ -static struct InstructionSpecifier* specifierForUID(InstrUID uid) { +static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { return &INSTRUCTIONS_SYM[uid]; } @@ -296,7 +290,7 @@ static int readPrefixes(struct InternalInstruction* insn) { BOOL isPrefix = TRUE; BOOL prefixGroups[4] = { FALSE }; uint64_t prefixLocation; - uint8_t byte; + uint8_t byte = 0; BOOL hasAdSize = FALSE; BOOL hasOpSize = FALSE; @@ -394,6 +388,7 @@ static int readPrefixes(struct InternalInstruction* insn) { } } else { unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; } if (insn->mode == MODE_16BIT) { @@ -405,7 +400,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->registerSize = (hasOpSize ? 2 : 4); insn->addressSize = (hasAdSize ? 2 : 4); insn->displacementSize = (hasAdSize ? 2 : 4); - insn->immediateSize = (hasAdSize ? 2 : 4); + insn->immediateSize = (hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; @@ -517,7 +512,8 @@ static int getIDWithAttrMask(uint16_t* instructionID, insn->opcode); if (hasModRMExtension) { - readModRM(insn); + if (readModRM(insn)) + return -1; *instructionID = decode(insn->opcodeType, instructionClass, @@ -632,9 +628,9 @@ static int getID(struct InternalInstruction* insn) { * instead of F2 changes a 32 to a 64, we adopt the new encoding. */ - struct InstructionSpecifier* spec; + const struct InstructionSpecifier *spec; uint16_t instructionIDWithREXw; - struct InstructionSpecifier* specWithREXw; + const struct InstructionSpecifier *specWithREXw; spec = specifierForUID(instructionID); @@ -672,9 +668,9 @@ static int getID(struct InternalInstruction* insn) { * in the right place we check if there's a 16-bit operation. */ - struct InstructionSpecifier* spec; + const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; - struct InstructionSpecifier* specWithOpsize; + const struct InstructionSpecifier *specWithOpsize; spec = specifierForUID(instructionID); @@ -866,7 +862,8 @@ static int readModRM(struct InternalInstruction* insn) { if (insn->consumedModRM) return 0; - consumeByte(insn, &insn->modRM); + if (consumeByte(insn, &insn->modRM)) + return -1; insn->consumedModRM = TRUE; mod = modFromModRM(insn->modRM); @@ -1067,7 +1064,7 @@ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) * invalid for its class. */ static int fixupReg(struct InternalInstruction *insn, - struct OperandSpecifier *op) { + const struct OperandSpecifier *op) { uint8_t valid; dbgprintf(insn, "fixupReg()"); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 28ba86b..4f4fbcd 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -24,7 +24,7 @@ extern "C" { const char* name; #define INSTRUCTION_IDS \ - InstrUID* instructionIDs; + const InstrUID *instructionIDs; #include "X86DisassemblerDecoderCommon.h" @@ -423,7 +423,7 @@ struct InternalInstruction { /* The instruction ID, extracted from the decode table */ uint16_t instructionID; /* The specifier for the instruction, from the instruction info table */ - struct InstructionSpecifier* spec; + const struct InstructionSpecifier *spec; /* state for additional bytes, consumed during operand decode. Pattern: consumed___ indicates that the byte was already consumed and does not diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 0f33f52..1425b86 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -22,7 +22,7 @@ #ifndef X86DISASSEMBLERDECODERCOMMON_H #define X86DISASSEMBLERDECODERCOMMON_H -#include "llvm/System/DataTypes.h" +#include "llvm/Support/DataTypes.h" #define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers #define CONTEXTS_SYM x86DisassemblerContexts @@ -248,6 +248,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_M64, "8-byte") \ ENUM_ENTRY(TYPE_LEA, "Effective address") \ ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \ + ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \ ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \ ENUM_ENTRY(TYPE_M1632, "2+4-byte") \ ENUM_ENTRY(TYPE_M1664, "2+8-byte") \ diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..033973e --- /dev/null +++ b/lib/Target/X86/InstPrinter/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86AsmPrinter + X86ATTInstPrinter.cpp + X86IntelInstPrinter.cpp + X86InstComments.cpp + ) +add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/lib/Target/X86/InstPrinter/Makefile b/lib/Target/X86/InstPrinter/Makefile new file mode 100644 index 0000000..c82aa33 --- /dev/null +++ b/lib/Target/X86/InstPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86AsmPrinter + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp new file mode 100644 index 0000000..d6950f4 --- /dev/null +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -0,0 +1,127 @@ +//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes code for rendering MCInst instances as AT&T-style +// assembly. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "X86ATTInstPrinter.h" +#include "X86InstComments.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include "X86GenInstrNames.inc" +using namespace llvm; + +// Include the auto-generated portion of the assembly writer. +#define GET_INSTRUCTION_NAME +#include "X86GenAsmWriter.inc" + +void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { + printInstruction(MI, OS); + + // If verbose assembly is enabled, we can print some informative comments. + if (CommentStream) + EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); +} +StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + + +void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, + raw_ostream &O) { + switch (MI->getOperand(Op).getImm()) { + default: assert(0 && "Invalid ssecc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + +/// print_pcrel_imm - This is used to print an immediate value that ends up +/// being encoded as a pc-relative value (e.g. for jumps and calls). These +/// print slightly differently than normal immediates. For example, a $ is not +/// emitted. +void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) + // Print this as a signed 32-bit value. + O << (int)Op.getImm(); + else { + assert(Op.isExpr() && "unknown pcrel immediate operand"); + O << *Op.getExpr(); + } +} + +void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + O << '%' << getRegisterName(Op.getReg()); + } else if (Op.isImm()) { + O << '$' << Op.getImm(); + + if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) + *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); + + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << '$' << *Op.getExpr(); + } +} + +void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &BaseReg = MI->getOperand(Op); + const MCOperand &IndexReg = MI->getOperand(Op+2); + const MCOperand &DispSpec = MI->getOperand(Op+3); + const MCOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O); + O << ':'; + } + + if (DispSpec.isImm()) { + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) + O << DispVal; + } else { + assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); + O << *DispSpec.getExpr(); + } + + if (IndexReg.getReg() || BaseReg.getReg()) { + O << '('; + if (BaseReg.getReg()) + printOperand(MI, Op, O); + + if (IndexReg.getReg()) { + O << ','; + printOperand(MI, Op+2, O); + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + if (ScaleVal != 1) + O << ',' << ScaleVal; + } + O << ')'; + } +} diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h new file mode 100644 index 0000000..eb98664 --- /dev/null +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -0,0 +1,81 @@ +//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an X86 MCInst to AT&T style .s file syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_ATT_INST_PRINTER_H +#define X86_ATT_INST_PRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + +class X86ATTInstPrinter : public MCInstPrinter { +public: + X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} + + + virtual void printInst(const MCInst *MI, raw_ostream &OS); + virtual StringRef getOpcodeName(unsigned Opcode) const; + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &OS); + static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); + void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); + void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS); + void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); + + void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + + void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); + } +}; + +} + +#endif diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp new file mode 100644 index 0000000..12144e3 --- /dev/null +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -0,0 +1,232 @@ +//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines functionality used to emit comments about X86 instructions to +// an output stream for -fverbose-asm. +// +//===----------------------------------------------------------------------===// + +#include "X86InstComments.h" +#include "X86GenInstrNames.inc" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include "../Utils/X86ShuffleDecode.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Top Level Entrypoint +//===----------------------------------------------------------------------===// + +/// EmitAnyX86InstComments - This function decodes x86 instructions and prints +/// newline terminated strings to the specified string if desired. This +/// information is shown in disassembly dumps when verbose assembly is enabled. +void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, + const char *(*getRegName)(unsigned)) { + // If this is a shuffle operation, the switch should fill in this state. + SmallVector<unsigned, 8> ShuffleMask; + const char *DestName = 0, *Src1Name = 0, *Src2Name = 0; + + switch (MI->getOpcode()) { + case X86::INSERTPSrr: + Src1Name = getRegName(MI->getOperand(1).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); + break; + + case X86::MOVLHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodeMOVLHPSMask(2, ShuffleMask); + break; + + case X86::MOVHLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodeMOVHLPSMask(2, ShuffleMask); + break; + + case X86::PSHUFDri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFDmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::PSHUFHWri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFHWmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::PSHUFLWri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::PSHUFLWmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + + case X86::PUNPCKHBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHBWrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(16, ShuffleMask); + break; + case X86::PUNPCKHWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHWDrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(8, ShuffleMask); + break; + case X86::PUNPCKHDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(4, ShuffleMask); + break; + case X86::PUNPCKHQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKHQDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKHMask(2, ShuffleMask); + break; + + case X86::PUNPCKLBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLBWrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(16, ShuffleMask); + break; + case X86::PUNPCKLWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLWDrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(8, ShuffleMask); + break; + case X86::PUNPCKLDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(4, ShuffleMask); + break; + case X86::PUNPCKLQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::PUNPCKLQDQrm: + Src1Name = getRegName(MI->getOperand(0).getReg()); + DecodePUNPCKLMask(2, ShuffleMask); + break; + + case X86::SHUFPDrri: + DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + break; + + case X86::SHUFPSrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::SHUFPSrmi: + DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + + case X86::UNPCKLPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKLPDrm: + DecodeUNPCKLPMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKLPSrm: + DecodeUNPCKLPMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKHPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKHPDrm: + DecodeUNPCKHPMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + case X86::UNPCKHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::UNPCKHPSrm: + DecodeUNPCKHPMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(0).getReg()); + break; + } + + + // If this was a shuffle operation, print the shuffle mask. + if (!ShuffleMask.empty()) { + if (DestName == 0) DestName = Src1Name; + OS << (DestName ? DestName : "mem") << " = "; + + // If the two sources are the same, canonicalize the input elements to be + // from the first src so that we get larger element spans. + if (Src1Name == Src2Name) { + for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { + if ((int)ShuffleMask[i] >= 0 && // Not sentinel. + ShuffleMask[i] >= e) // From second mask. + ShuffleMask[i] -= e; + } + } + + // The shuffle mask specifies which elements of the src1/src2 fill in the + // destination, with a few sentinel values. Loop through and print them + // out. + for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { + if (i != 0) + OS << ','; + if (ShuffleMask[i] == SM_SentinelZero) { + OS << "zero"; + continue; + } + + // Otherwise, it must come from src1 or src2. Print the span of elements + // that comes from this src. + bool isSrc1 = ShuffleMask[i] < ShuffleMask.size(); + const char *SrcName = isSrc1 ? Src1Name : Src2Name; + OS << (SrcName ? SrcName : "mem") << '['; + bool IsFirst = true; + while (i != e && + (int)ShuffleMask[i] >= 0 && + (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) { + if (!IsFirst) + OS << ','; + else + IsFirst = false; + OS << ShuffleMask[i] % ShuffleMask.size(); + ++i; + } + OS << ']'; + --i; // For loop increments element #. + } + //MI->print(OS, 0); + OS << "\n"; + } + +} diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h new file mode 100644 index 0000000..6b86db4 --- /dev/null +++ b/lib/Target/X86/InstPrinter/X86InstComments.h @@ -0,0 +1,25 @@ +//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This defines functionality used to emit comments about X86 instructions to +// an output stream for -fverbose-asm. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_INST_COMMENTS_H +#define X86_INST_COMMENTS_H + +namespace llvm { + class MCInst; + class raw_ostream; + void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, + const char *(*getRegName)(unsigned)); +} + +#endif diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp new file mode 100644 index 0000000..0484529 --- /dev/null +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -0,0 +1,139 @@ +//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes code for rendering MCInst instances as AT&T-style +// assembly. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "X86IntelInstPrinter.h" +#include "X86InstComments.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "X86GenInstrNames.inc" +#include <cctype> +using namespace llvm; + +// Include the auto-generated portion of the assembly writer. +#define GET_INSTRUCTION_NAME +#include "X86GenAsmWriter1.inc" + +void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { + printInstruction(MI, OS); + + // If verbose assembly is enabled, we can print some informative comments. + if (CommentStream) + EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); +} +StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + +void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, + raw_ostream &O) { + switch (MI->getOperand(Op).getImm()) { + default: assert(0 && "Invalid ssecc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + +/// print_pcrel_imm - This is used to print an immediate value that ends up +/// being encoded as a pc-relative value. +void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) + O << Op.getImm(); + else { + assert(Op.isExpr() && "unknown pcrel immediate operand"); + O << *Op.getExpr(); + } +} + +static void PrintRegName(raw_ostream &O, StringRef RegName) { + for (unsigned i = 0, e = RegName.size(); i != e; ++i) + O << (char)toupper(RegName[i]); +} + +void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + PrintRegName(O, getRegisterName(Op.getReg())); + } else if (Op.isImm()) { + O << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << *Op.getExpr(); + } +} + +void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &BaseReg = MI->getOperand(Op); + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + const MCOperand &IndexReg = MI->getOperand(Op+2); + const MCOperand &DispSpec = MI->getOperand(Op+3); + const MCOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O); + O << ':'; + } + + O << '['; + + bool NeedPlus = false; + if (BaseReg.getReg()) { + printOperand(MI, Op, O); + NeedPlus = true; + } + + if (IndexReg.getReg()) { + if (NeedPlus) O << " + "; + if (ScaleVal != 1) + O << ScaleVal << '*'; + printOperand(MI, Op+2, O); + NeedPlus = true; + } + + + if (!DispSpec.isImm()) { + if (NeedPlus) O << " + "; + assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); + O << *DispSpec.getExpr(); + } else { + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { + if (NeedPlus) { + if (DispVal > 0) + O << " + "; + else { + O << " - "; + DispVal = -DispVal; + } + } + O << DispVal; + } + } + + O << ']'; +} diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h new file mode 100644 index 0000000..6f12032 --- /dev/null +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -0,0 +1,95 @@ +//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an X86 MCInst to intel style .s file syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_INTEL_INST_PRINTER_H +#define X86_INTEL_INST_PRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class MCOperand; + +class X86IntelInstPrinter : public MCInstPrinter { +public: + X86IntelInstPrinter(const MCAsmInfo &MAI) + : MCInstPrinter(MAI) {} + + virtual void printInst(const MCInst *MI, raw_ostream &OS); + virtual StringRef getOpcodeName(unsigned Opcode) const; + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); + + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); + void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); + void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "OPAQUE PTR "; + printMemReference(MI, OpNo, O); + } + + void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "BYTE PTR "; + printMemReference(MI, OpNo, O); + } + void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "WORD PTR "; + printMemReference(MI, OpNo, O); + } + void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "DWORD PTR "; + printMemReference(MI, OpNo, O); + } + void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "QWORD PTR "; + printMemReference(MI, OpNo, O); + } + void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo, O); + } + void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo, O); + } + void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "DWORD PTR "; + printMemReference(MI, OpNo, O); + } + void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "QWORD PTR "; + printMemReference(MI, OpNo, O); + } + void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "XWORD PTR "; + printMemReference(MI, OpNo, O); + } + void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo, O); + } + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo, O); + } +}; + +} + +#endif diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index f4ff894..12fb090 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -20,6 +20,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ X86GenCallingConv.inc X86GenSubtarget.inc \ X86GenEDInfo.inc -DIRS = AsmPrinter AsmParser Disassembler TargetInfo +DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index f96b22f..f16ec02 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -20,7 +20,28 @@ __m128i shift_right(__m128i value, unsigned long offset) { //===---------------------------------------------------------------------===// SSE has instructions for doing operations on complex numbers, we should pattern -match them. Compiling this: +match them. For example, this should turn into a horizontal add: + +typedef float __attribute__((vector_size(16))) v4f32; +float f32(v4f32 A) { + return A[0]+A[1]+A[2]+A[3]; +} + +Instead we get this: + +_f32: ## @f32 + pshufd $1, %xmm0, %xmm1 ## xmm1 = xmm0[1,0,0,0] + addss %xmm0, %xmm1 + pshufd $3, %xmm0, %xmm2 ## xmm2 = xmm0[3,0,0,0] + movhlps %xmm0, %xmm0 ## xmm0 = xmm0[1,1] + movaps %xmm0, %xmm3 + addss %xmm1, %xmm3 + movdqa %xmm2, %xmm0 + addss %xmm3, %xmm0 + ret + +Also, there are cases where some simple local SLP would improve codegen a bit. +compiling this: _Complex float f32(_Complex float A, _Complex float B) { return A+B; @@ -28,19 +49,17 @@ _Complex float f32(_Complex float A, _Complex float B) { into: -_f32: +_f32: ## @f32 movdqa %xmm0, %xmm2 addss %xmm1, %xmm2 - pshufd $16, %xmm2, %xmm2 - pshufd $1, %xmm1, %xmm1 - pshufd $1, %xmm0, %xmm0 - addss %xmm1, %xmm0 - pshufd $16, %xmm0, %xmm1 - movdqa %xmm2, %xmm0 - unpcklps %xmm1, %xmm0 + pshufd $1, %xmm1, %xmm1 ## xmm1 = xmm1[1,0,0,0] + pshufd $1, %xmm0, %xmm3 ## xmm3 = xmm0[1,0,0,0] + addss %xmm1, %xmm3 + movaps %xmm2, %xmm0 + unpcklps %xmm3, %xmm0 ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] ret -seems silly. +seems silly when it could just be one addps. //===---------------------------------------------------------------------===// @@ -904,4 +923,15 @@ The insertps's of $0 are pointless complex copies. //===---------------------------------------------------------------------===// +If SSE4.1 is available we should inline rounding functions instead of emitting +a libcall. + +floor: roundsd $0x01, %xmm, %xmm +ceil: roundsd $0x02, %xmm, %xmm +and likewise for the single precision versions. + +Currently, SelectionDAGBuilder doesn't turn calls to these functions into the +corresponding nodes and some targets (including X86) aren't ready for them. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index 78c4dc0..e21d69a 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -41,50 +41,6 @@ saved a few instructions. //===---------------------------------------------------------------------===// -Poor codegen: - -int X[2]; -int b; -void test(void) { - memset(X, b, 2*sizeof(X[0])); -} - -llc: - movq _b@GOTPCREL(%rip), %rax - movzbq (%rax), %rax - movq %rax, %rcx - shlq $8, %rcx - orq %rax, %rcx - movq %rcx, %rax - shlq $16, %rax - orq %rcx, %rax - movq %rax, %rcx - shlq $32, %rcx - movq _X@GOTPCREL(%rip), %rdx - orq %rax, %rcx - movq %rcx, (%rdx) - ret - -gcc: - movq _b@GOTPCREL(%rip), %rax - movabsq $72340172838076673, %rdx - movzbq (%rax), %rax - imulq %rdx, %rax - movq _X@GOTPCREL(%rip), %rdx - movq %rax, (%rdx) - ret - -And the codegen is even worse for the following -(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103): - void fill1(char *s, int a) - { - __builtin_memset(s, a, 15); - } - -For this version, we duplicate the computation of the constant to store. - -//===---------------------------------------------------------------------===// - It's not possible to reference AH, BH, CH, and DH registers in an instruction requiring REX prefix. However, divb and mulb both produce results in AH. If isel emits a CopyFromReg which gets turned into a movb and that can be allocated a diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index a305ae6..c10e170 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -67,19 +67,6 @@ cmovs, we should expand to a conditional branch like GCC produces. //===---------------------------------------------------------------------===// -Compile this: -_Bool f(_Bool a) { return a!=1; } - -into: - movzbl %dil, %eax - xorl $1, %eax - ret - -(Although note that this isn't a legal way to express the code that llvm-gcc -currently generates for that function.) - -//===---------------------------------------------------------------------===// - Some isel ideas: 1. Dynamic programming based approach when compile time if not an @@ -109,6 +96,37 @@ It appears icc use push for parameter passing. Need to investigate. //===---------------------------------------------------------------------===// +This: + +void foo(void); +void bar(int x, int *P) { + x >>= 2; + if (x) + foo(); + *P = x; +} + +compiles into: + + movq %rsi, %rbx + movl %edi, %r14d + sarl $2, %r14d + testl %r14d, %r14d + je LBB0_2 + +Instead of doing an explicit test, we can use the flags off the sar. This +occurs in a bigger testcase like this, which is pretty common: + +#include <vector> +int test1(std::vector<int> &X) { + int Sum = 0; + for (long i = 0, e = X.size(); i != e; ++i) + X[i] = 0; + return Sum; +} + +//===---------------------------------------------------------------------===// + Only use inc/neg/not instructions on processors where they are faster than add/sub/xor. They are slower on the P4 due to only updating some processor flags. @@ -394,72 +412,8 @@ boundary to improve performance. //===---------------------------------------------------------------------===// -Codegen: - -int f(int a, int b) { - if (a == 4 || a == 6) - b++; - return b; -} - - -as: - -or eax, 2 -cmp eax, 6 -jz label - -//===---------------------------------------------------------------------===// - GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting -simplifications for integer "x cmp y ? a : b". For example, instead of: - -int G; -void f(int X, int Y) { - G = X < 0 ? 14 : 13; -} - -compiling to: - -_f: - movl $14, %eax - movl $13, %ecx - movl 4(%esp), %edx - testl %edx, %edx - cmovl %eax, %ecx - movl %ecx, _G - ret - -it could be: -_f: - movl 4(%esp), %eax - sarl $31, %eax - notl %eax - addl $14, %eax - movl %eax, _G - ret - -etc. - -Another is: -int usesbb(unsigned int a, unsigned int b) { - return (a < b ? -1 : 0); -} -to: -_usesbb: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - sbbl %eax, %eax - ret - -instead of: -_usesbb: - xorl %eax, %eax - movl 8(%esp), %ecx - cmpl %ecx, 4(%esp) - movl $4294967295, %ecx - cmovb %ecx, %eax - ret +simplifications for integer "x cmp y ? a : b". //===---------------------------------------------------------------------===// @@ -756,23 +710,17 @@ This: { return !full_add(a, b).second; } Should compile to: + addl %esi, %edi + setae %al + movzbl %al, %eax + ret - - _Z11no_overflowjj: - addl %edi, %esi - setae %al - ret - -FIXME: That code looks wrong; bool return is normally defined as zext. - -on x86-64, not: - -__Z11no_overflowjj: - addl %edi, %esi - cmpl %edi, %esi - setae %al - movzbl %al, %eax - ret +on x86-64, instead of the rather stupid-looking: + addl %esi, %edi + setb %al + xorb $1, %al + movzbl %al, %eax + ret //===---------------------------------------------------------------------===// @@ -1040,10 +988,10 @@ _foo: instead of: _foo: - movl $255, %eax - orl 4(%esp), %eax - andl $65535, %eax - ret + movl $65280, %eax + andl 4(%esp), %eax + orl $255, %eax + ret //===---------------------------------------------------------------------===// @@ -1165,58 +1113,6 @@ abs: //===---------------------------------------------------------------------===// -Consider: -int test(unsigned long a, unsigned long b) { return -(a < b); } - -We currently compile this to: - -define i32 @test(i32 %a, i32 %b) nounwind { - %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1] - %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - %tmp5 = sub i32 0, %tmp34 ; <i32> [#uses=1] - ret i32 %tmp5 -} - -and - -_test: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - setb %al - movzbl %al, %eax - negl %eax - ret - -Several deficiencies here. First, we should instcombine zext+neg into sext: - -define i32 @test2(i32 %a, i32 %b) nounwind { - %tmp3 = icmp ult i32 %a, %b ; <i1> [#uses=1] - %tmp34 = sext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp34 -} - -However, before we can do that, we have to fix the bad codegen that we get for -sext from bool: - -_test2: - movl 8(%esp), %eax - cmpl %eax, 4(%esp) - setb %al - movzbl %al, %eax - shll $31, %eax - sarl $31, %eax - ret - -This code should be at least as good as the code above. Once this is fixed, we -can optimize this specific case even more to: - - movl 8(%esp), %eax - xorl %ecx, %ecx - cmpl %eax, 4(%esp) - sbbl %ecx, %ecx - -//===---------------------------------------------------------------------===// - Take the following code (from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541): @@ -1605,6 +1501,8 @@ loop, the value comes into the loop as two values, and RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the constructed BUILD_PAIR which represents the cast value. +This can be handled by making CodeGenPrepare sink the cast. + //===---------------------------------------------------------------------===// Test instructions can be eliminated by using EFLAGS values from arithmetic @@ -1736,46 +1634,6 @@ Ideal output: //===---------------------------------------------------------------------===// -Testcase: -int x(int a) { return (a & 0x80) ? 0x100 : 0; } -int y(int a) { return (a & 0x80) *2; } - -Current: - testl $128, 4(%esp) - setne %al - movzbl %al, %eax - shll $8, %eax - ret - -Better: - movl 4(%esp), %eax - addl %eax, %eax - andl $256, %eax - ret - -This is another general instcombine transformation that is profitable on all -targets. In LLVM IR, these functions look like this: - -define i32 @x(i32 %a) nounwind readnone { -entry: - %0 = and i32 %a, 128 - %1 = icmp eq i32 %0, 0 - %iftmp.0.0 = select i1 %1, i32 0, i32 256 - ret i32 %iftmp.0.0 -} - -define i32 @y(i32 %a) nounwind readnone { -entry: - %0 = shl i32 %a, 1 - %1 = and i32 %0, 256 - ret i32 %1 -} - -Replacing an icmp+select with a shift should always be considered profitable in -instcombine. - -//===---------------------------------------------------------------------===// - Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch properly. @@ -1960,3 +1818,100 @@ load, making it non-trivial to determine if there's anything between the load and the store which would prohibit narrowing. //===---------------------------------------------------------------------===// + +This code: +void foo(unsigned x) { + if (x == 0) bar(); + else if (x == 1) qux(); +} + +currently compiles into: +_foo: + movl 4(%esp), %eax + cmpl $1, %eax + je LBB0_3 + testl %eax, %eax + jne LBB0_4 + +the testl could be removed: +_foo: + movl 4(%esp), %eax + cmpl $1, %eax + je LBB0_3 + jb LBB0_4 + +0 is the only unsigned number < 1. + +//===---------------------------------------------------------------------===// + +This code: + +%0 = type { i32, i1 } + +define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp { +entry: + %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x) + %cmp = extractvalue %0 %uadd, 1 + %inc = zext i1 %cmp to i32 + %add = add i32 %x, %sum + %z.0 = add i32 %add, %inc + ret i32 %z.0 +} + +declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + +compiles to: + +_add32carry: ## @add32carry + addl %esi, %edi + sbbl %ecx, %ecx + movl %edi, %eax + subl %ecx, %eax + ret + +But it could be: + +_add32carry: + leal (%rsi,%rdi), %eax + cmpl %esi, %eax + adcl $0, %eax + ret + +//===---------------------------------------------------------------------===// + +This: +char t(char c) { + return c/3; +} + +Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer + +_t: ## @t + movslq %edi, %rax + imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB + shrq $32, %rcx + addl %ecx, %eax + movl %eax, %ecx + shrl $31, %ecx + shrl %eax + addl %ecx, %eax + movsbl %al, %eax + ret + +GCC gets: + +_t: + movl $86, %eax + imulb %dil + shrw $8, %ax + sarb $7, %dil + subb %dil, %al + movsbl %al,%eax + ret + +which is nicer. This also happens for int, not just char. + +//===---------------------------------------------------------------------===// + + + diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt new file mode 100644 index 0000000..3ad5f99 --- /dev/null +++ b/lib/Target/X86/Utils/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86Utils + X86ShuffleDecode.cpp + ) +add_dependencies(LLVMX86Utils X86CodeGenTable_gen) diff --git a/lib/Target/X86/Utils/Makefile b/lib/Target/X86/Utils/Makefile new file mode 100644 index 0000000..1df6f0f --- /dev/null +++ b/lib/Target/X86/Utils/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Utils + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp new file mode 100644 index 0000000..1287977 --- /dev/null +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -0,0 +1,148 @@ +//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics into a +// generic vector mask. +// +//===----------------------------------------------------------------------===// + +#include "X86ShuffleDecode.h" + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +namespace llvm { + +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { + // Defaults the copying the dest value. + ShuffleMask.push_back(0); + ShuffleMask.push_back(1); + ShuffleMask.push_back(2); + ShuffleMask.push_back(3); + + // Decode the immediate. + unsigned ZMask = Imm & 15; + unsigned CountD = (Imm >> 4) & 3; + unsigned CountS = (Imm >> 6) & 3; + + // CountS selects which input element to use. + unsigned InVal = 4+CountS; + // CountD specifies which element of destination to update. + ShuffleMask[CountD] = InVal; + // ZMask zaps values, potentially overriding the CountD elt. + if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; + if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; + if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; + if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; +} + +// <3,1> or <6,7,2,3> +void DecodeMOVHLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = NElts/2; i != NElts; ++i) + ShuffleMask.push_back(NElts+i); + + for (unsigned i = NElts/2; i != NElts; ++i) + ShuffleMask.push_back(i); +} + +// <0,2> or <0,1,4,5> +void DecodeMOVLHPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) + ShuffleMask.push_back(i); + + for (unsigned i = 0; i != NElts/2; ++i) + ShuffleMask.push_back(NElts+i); +} + +void DecodePSHUFMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts; ++i) { + ShuffleMask.push_back(Imm % NElts); + Imm /= NElts; + } +} + +void DecodePSHUFHWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + ShuffleMask.push_back(0); + ShuffleMask.push_back(1); + ShuffleMask.push_back(2); + ShuffleMask.push_back(3); + for (unsigned i = 0; i != 4; ++i) { + ShuffleMask.push_back(4+(Imm & 3)); + Imm >>= 2; + } +} + +void DecodePSHUFLWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != 4; ++i) { + ShuffleMask.push_back((Imm & 3)); + Imm >>= 2; + } + ShuffleMask.push_back(4); + ShuffleMask.push_back(5); + ShuffleMask.push_back(6); + ShuffleMask.push_back(7); +} + +void DecodePUNPCKLMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i); + ShuffleMask.push_back(i+NElts); + } +} + +void DecodePUNPCKHMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i+NElts/2); + ShuffleMask.push_back(i+NElts+NElts/2); + } +} + +void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + // Part that reads from dest. + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(Imm % NElts); + Imm /= NElts; + } + // Part that reads from src. + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(Imm % NElts + NElts); + Imm /= NElts; + } +} + +void DecodeUNPCKHPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i+NElts/2); // Reads from dest + ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src + } +} + + +/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// etc. NElts indicates the number of elements in the vector allowing it to +/// handle different datatypes and vector widths. +void DecodeUNPCKLPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask) { + for (unsigned i = 0; i != NElts/2; ++i) { + ShuffleMask.push_back(i); // Reads from dest + ShuffleMask.push_back(i+NElts); // Reads from src + } +} + +} // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h new file mode 100644 index 0000000..50d9ccb --- /dev/null +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -0,0 +1,69 @@ +//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Define several functions to decode x86 specific shuffle semantics into a +// generic vector mask. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_SHUFFLE_DECODE_H +#define X86_SHUFFLE_DECODE_H + +#include "llvm/ADT/SmallVector.h" + +//===----------------------------------------------------------------------===// +// Vector Mask Decoding +//===----------------------------------------------------------------------===// + +namespace llvm { +enum { + SM_SentinelZero = ~0U +}; + +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); + +// <3,1> or <6,7,2,3> +void DecodeMOVHLPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +// <0,2> or <0,1,4,5> +void DecodeMOVLHPSMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePSHUFMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePSHUFHWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePSHUFLWMask(unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKLMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodePUNPCKHMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); + +void DecodeUNPCKHPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + + +/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// etc. NElts indicates the number of elements in the vector allowing it to +/// handle different datatypes and vector widths. +void DecodeUNPCKLPMask(unsigned NElts, + SmallVectorImpl<unsigned> &ShuffleMask); + +} // llvm namespace + +#endif diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 27e8850..0ca4366 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -15,6 +15,7 @@ #ifndef TARGET_X86_H #define TARGET_X86_H +#include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -23,11 +24,13 @@ class FunctionPass; class JITCodeEmitter; class MCCodeEmitter; class MCContext; +class MCObjectWriter; class MachineCodeEmitter; class Target; class TargetAsmBackend; class X86TargetMachine; class formatted_raw_ostream; +class raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. @@ -74,6 +77,13 @@ FunctionPass *createEmitX86CodeToMemory(); /// FunctionPass *createX86MaxStackAlignmentHeuristicPass(); + +/// createX86MachObjectWriter - Construct an X86 Mach-O object writer. +MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); + extern Target TheX86_32Target, TheX86_64Target; } // End llvm namespace diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index a19f1ac..efb6c8c 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -23,6 +23,9 @@ include "llvm/Target/Target.td" def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", "Enable conditional move instructions">; +def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true", + "Support POPCNT instruction">; + def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; @@ -45,7 +48,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41", [FeatureSSSE3]>; def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", - [FeatureSSE41]>; + [FeatureSSE41, FeaturePOPCNT]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", "Enable 3DNow! instructions">; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", @@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem", "IsUAMemFast", "true", "Fast unaligned memory access">; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", - "Support SSE 4a instructions">; + "Support SSE 4a instructions", + [FeaturePOPCNT]>; def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", "Enable AVX instructions">; @@ -112,11 +116,13 @@ def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, FeatureFastUAMem]>; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge -def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem, FeatureAES]>; -// Sandy Bridge does not have FMA -// FIXME: Wikipedia says it does... it should have AES as well. -def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; +def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem, FeatureAES, FeatureCLMUL]>; +// SSE is not listed here since llvm treats AVX as a reimplementation of SSE, +// rather than a superset. +// FIXME: Disabling AVX for now since it's not ready. +def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit, + FeatureAES, FeatureCLMUL]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; @@ -176,7 +182,7 @@ include "X86CallingConv.td" //===----------------------------------------------------------------------===// -// Assembly Printers +// Assembly Parser //===----------------------------------------------------------------------===// // Currently the X86 assembly parser only supports ATT syntax. @@ -191,15 +197,21 @@ def ATTAsmParser : AsmParser { string RegisterPrefix = "%"; } +//===----------------------------------------------------------------------===// +// Assembly Printers +//===----------------------------------------------------------------------===// + // The X86 target supports two different syntaxes for emitting machine code. // This is controlled by the -x86-asm-syntax={att|intel} def ATTAsmWriter : AsmWriter { string AsmWriterClassName = "ATTInstPrinter"; int Variant = 0; + bit isMCAsmWriter = 1; } def IntelAsmWriter : AsmWriter { string AsmWriterClassName = "IntelInstPrinter"; int Variant = 1; + bit isMCAsmWriter = 1; } def X86 : Target { diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 69dc967..da5f5b1 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -11,50 +11,83 @@ #include "X86.h" #include "X86FixupKinds.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/ELFObjectWriter.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MachObjectWriter.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; - static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: assert(0 && "invalid fixup kind!"); - case X86::reloc_pcrel_1byte: + case FK_PCRel_1: case FK_Data_1: return 0; - case X86::reloc_pcrel_2byte: + case FK_PCRel_2: case FK_Data_2: return 1; - case X86::reloc_pcrel_4byte: + case FK_PCRel_4: case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: + case X86::reloc_signed_4byte: + case X86::reloc_global_offset_table: case FK_Data_4: return 2; + case FK_PCRel_8: case FK_Data_8: return 3; } } namespace { + +class X86ELFObjectWriter : public MCELFObjectTargetWriter { +public: + X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine, + bool HasRelocationAddend) + : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {} +}; + class X86AsmBackend : public TargetAsmBackend { public: X86AsmBackend(const Target &T) - : TargetAsmBackend(T) {} + : TargetAsmBackend() {} + + unsigned getNumFixupKinds() const { + return X86::NumTargetFixupKinds; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { + { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel}, + { "reloc_signed_4byte", 0, 4 * 8, 0}, + { "reloc_global_offset_table", 0, 4 * 8, 0} + }; + + if (Kind < FirstTargetFixupKind) + return TargetAsmBackend::getFixupKindInfo(Kind); - void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF, + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.getOffset() + Size <= DF.getContents().size() && + assert(Fixup.getOffset() + Size <= DataSize && "Invalid fixup offset!"); for (unsigned i = 0; i != Size; ++i) - DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); + Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); } bool MayNeedRelaxation(const MCInst &Inst) const; @@ -63,9 +96,9 @@ public: bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; -} // end anonymous namespace +} // end anonymous namespace -static unsigned getRelaxedOpcode(unsigned Op) { +static unsigned getRelaxedOpcodeBranch(unsigned Op) { switch (Op) { default: return Op; @@ -90,16 +123,104 @@ static unsigned getRelaxedOpcode(unsigned Op) { } } +static unsigned getRelaxedOpcodeArith(unsigned Op) { + switch (Op) { + default: + return Op; + + // IMUL + case X86::IMUL16rri8: return X86::IMUL16rri; + case X86::IMUL16rmi8: return X86::IMUL16rmi; + case X86::IMUL32rri8: return X86::IMUL32rri; + case X86::IMUL32rmi8: return X86::IMUL32rmi; + case X86::IMUL64rri8: return X86::IMUL64rri32; + case X86::IMUL64rmi8: return X86::IMUL64rmi32; + + // AND + case X86::AND16ri8: return X86::AND16ri; + case X86::AND16mi8: return X86::AND16mi; + case X86::AND32ri8: return X86::AND32ri; + case X86::AND32mi8: return X86::AND32mi; + case X86::AND64ri8: return X86::AND64ri32; + case X86::AND64mi8: return X86::AND64mi32; + + // OR + case X86::OR16ri8: return X86::OR16ri; + case X86::OR16mi8: return X86::OR16mi; + case X86::OR32ri8: return X86::OR32ri; + case X86::OR32mi8: return X86::OR32mi; + case X86::OR64ri8: return X86::OR64ri32; + case X86::OR64mi8: return X86::OR64mi32; + + // XOR + case X86::XOR16ri8: return X86::XOR16ri; + case X86::XOR16mi8: return X86::XOR16mi; + case X86::XOR32ri8: return X86::XOR32ri; + case X86::XOR32mi8: return X86::XOR32mi; + case X86::XOR64ri8: return X86::XOR64ri32; + case X86::XOR64mi8: return X86::XOR64mi32; + + // ADD + case X86::ADD16ri8: return X86::ADD16ri; + case X86::ADD16mi8: return X86::ADD16mi; + case X86::ADD32ri8: return X86::ADD32ri; + case X86::ADD32mi8: return X86::ADD32mi; + case X86::ADD64ri8: return X86::ADD64ri32; + case X86::ADD64mi8: return X86::ADD64mi32; + + // SUB + case X86::SUB16ri8: return X86::SUB16ri; + case X86::SUB16mi8: return X86::SUB16mi; + case X86::SUB32ri8: return X86::SUB32ri; + case X86::SUB32mi8: return X86::SUB32mi; + case X86::SUB64ri8: return X86::SUB64ri32; + case X86::SUB64mi8: return X86::SUB64mi32; + + // CMP + case X86::CMP16ri8: return X86::CMP16ri; + case X86::CMP16mi8: return X86::CMP16mi; + case X86::CMP32ri8: return X86::CMP32ri; + case X86::CMP32mi8: return X86::CMP32mi; + case X86::CMP64ri8: return X86::CMP64ri32; + case X86::CMP64mi8: return X86::CMP64mi32; + + // PUSH + case X86::PUSHi8: return X86::PUSHi32; + } +} + +static unsigned getRelaxedOpcode(unsigned Op) { + unsigned R = getRelaxedOpcodeArith(Op); + if (R != Op) + return R; + return getRelaxedOpcodeBranch(Op); +} + bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { + // Branches can always be relaxed. + if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode()) + return true; + // Check if this instruction is ever relaxable. - if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode()) + if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode()) return false; - // If so, just assume it can be relaxed. Once we support relaxing more complex - // instructions we should check that the instruction actually has symbolic - // operands before doing this, but we need to be careful about things like - // PCrel. - return true; + + // Check if it has an expression and is not RIP relative. + bool hasExp = false; + bool hasRIP = false; + for (unsigned i = 0; i < Inst.getNumOperands(); ++i) { + const MCOperand &Op = Inst.getOperand(i); + if (Op.isExpr()) + hasExp = true; + + if (Op.isReg() && Op.getReg() == X86::RIP) + hasRIP = true; + } + + // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on + // how we do relaxations? + return hasExp && !hasRIP; } // FIXME: Can tblgen help at all here to verify there aren't other instructions @@ -123,10 +244,8 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { /// WriteNopData - Write optimal nops to the output file for the \arg Count /// bytes. This returns the number of bytes written. It may return 0 if /// the \arg Count is more than the maximum optimal nops. -/// -/// FIXME this is X86 32-bit specific and should move to a better place. bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { - static const uint8_t Nops[16][16] = { + static const uint8_t Nops[10][10] = { // nop {0x90}, // xchg %ax,%ax @@ -147,32 +266,16 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, // nopw %cs:0L(%[re]ax,%[re]ax,1) {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, - // nopl 0(%[re]ax,%[re]ax,1) - // nopw 0(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopw 0(%[re]ax,%[re]ax,1) - // nopw 0(%[re]ax,%[re]ax,1) - {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00}, - // nopw 0(%[re]ax,%[re]ax,1) - // nopl 0L(%[re]ax) */ - {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, - 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, - // nopl 0L(%[re]ax) - // nopl 0L(%[re]ax) - {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00}, - // nopl 0L(%[re]ax) - // nopl 0L(%[re]ax,%[re]ax,1) - {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00} }; // Write an optimal sequence for the first 15 bytes. - uint64_t OptimalCount = (Count < 16) ? Count : 15; - for (uint64_t i = 0, e = OptimalCount; i != e; i++) - OW->Write8(Nops[OptimalCount - 1][i]); + const uint64_t OptimalCount = (Count < 16) ? Count : 15; + const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10; + for (uint64_t i = 0, e = Prefixes; i != e; i++) + OW->Write8(0x66); + const uint64_t Rest = OptimalCount - Prefixes; + for (uint64_t i = 0, e = Rest; i != e; i++) + OW->Write8(Nops[Rest - 1][i]); // Finish with single byte nops. for (uint64_t i = OptimalCount, e = Count; i != e; ++i) @@ -186,75 +289,60 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { namespace { class ELFX86AsmBackend : public X86AsmBackend { public: - ELFX86AsmBackend(const Target &T) - : X86AsmBackend(T) { - HasAbsolutizedSet = true; - HasScatteredSymbols = true; + Triple::OSType OSType; + ELFX86AsmBackend(const Target &T, Triple::OSType _OSType) + : X86AsmBackend(T), OSType(_OSType) { + HasReliableSymbolDifference = true; } - bool isVirtualSection(const MCSection &Section) const { - const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section); - return SE.getType() == MCSectionELF::SHT_NOBITS;; + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section); + return ES.getFlags() & ELF::SHF_MERGE; } }; class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_32AsmBackend(const Target &T) - : ELFX86AsmBackend(T) {} + ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType) + : ELFX86AsmBackend(T, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return new ELFObjectWriter(OS, /*Is64Bit=*/false, - /*IsLittleEndian=*/true, - /*HasRelocationAddend=*/false); + return createELFObjectWriter(new X86ELFObjectWriter(false, OSType, + ELF::EM_386, false), + OS, /*IsLittleEndian*/ true); } }; class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: - ELFX86_64AsmBackend(const Target &T) - : ELFX86AsmBackend(T) {} + ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType) + : ELFX86AsmBackend(T, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return new ELFObjectWriter(OS, /*Is64Bit=*/true, - /*IsLittleEndian=*/true, - /*HasRelocationAddend=*/true); + return createELFObjectWriter(new X86ELFObjectWriter(true, OSType, + ELF::EM_X86_64, true), + OS, /*IsLittleEndian*/ true); } }; class WindowsX86AsmBackend : public X86AsmBackend { bool Is64Bit; + public: WindowsX86AsmBackend(const Target &T, bool is64Bit) : X86AsmBackend(T) , Is64Bit(is64Bit) { - HasScatteredSymbols = true; } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createWinCOFFObjectWriter(OS, Is64Bit); } - - bool isVirtualSection(const MCSection &Section) const { - const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section); - return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; - } }; class DarwinX86AsmBackend : public X86AsmBackend { public: DarwinX86AsmBackend(const Target &T) - : X86AsmBackend(T) { - HasAbsolutizedSet = true; - HasScatteredSymbols = true; - } - - bool isVirtualSection(const MCSection &Section) const { - const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); - return (SMO.getType() == MCSectionMachO::S_ZEROFILL || - SMO.getType() == MCSectionMachO::S_GB_ZEROFILL || - SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL); - } + : X86AsmBackend(T) { } }; class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { @@ -263,7 +351,9 @@ public: : DarwinX86AsmBackend(T) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return new MachObjectWriter(OS, /*Is64Bit=*/false); + return createX86MachObjectWriter(OS, /*Is64Bit=*/false, + object::mach::CTM_i386, + object::mach::CSX86_ALL); } }; @@ -275,7 +365,9 @@ public: } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return new MachObjectWriter(OS, /*Is64Bit=*/true); + return createX86MachObjectWriter(OS, /*Is64Bit=*/true, + object::mach::CTM_x86_64, + object::mach::CSX86_ALL); } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { @@ -312,7 +404,7 @@ public: } }; -} // end anonymous namespace +} // end anonymous namespace TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const std::string &TT) { @@ -322,9 +414,12 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, case Triple::MinGW32: case Triple::Cygwin: case Triple::Win32: - return new WindowsX86AsmBackend(T, false); + if (Triple(TT).getEnvironment() == Triple::MachO) + return new DarwinX86_32AsmBackend(T); + else + return new WindowsX86AsmBackend(T, false); default: - return new ELFX86_32AsmBackend(T); + return new ELFX86_32AsmBackend(T, Triple(TT).getOS()); } } @@ -333,11 +428,14 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, switch (Triple(TT).getOS()) { case Triple::Darwin: return new DarwinX86_64AsmBackend(T); - case Triple::MinGW64: + case Triple::MinGW32: case Triple::Cygwin: case Triple::Win32: - return new WindowsX86AsmBackend(T, true); + if (Triple(TT).getEnvironment() == Triple::MachO) + return new DarwinX86_64AsmBackend(T); + else + return new WindowsX86AsmBackend(T, true); default: - return new ELFX86_64AsmBackend(T); + return new ELFX86_64AsmBackend(T, Triple(TT).getOS()); } } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 20110ad..99b4479 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" -#include "AsmPrinter/X86ATTInstPrinter.h" -#include "AsmPrinter/X86IntelInstPrinter.h" +#include "InstPrinter/X86ATTInstPrinter.h" +#include "InstPrinter/X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" #include "X86COFFMachineModuleInfo.h" @@ -48,21 +48,15 @@ using namespace llvm; // Primitive Helper Functions. //===----------------------------------------------------------------------===// -void X86AsmPrinter::PrintPICBaseSymbol(raw_ostream &O) const { - const TargetLowering *TLI = TM.getTargetLowering(); - O << *static_cast<const X86TargetLowering*>(TLI)->getPICBaseSymbol(MF, - OutContext); -} - /// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); - if (Subtarget->isTargetCOFF()) { + if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) { bool Intrn = MF.getFunction()->hasInternalLinkage(); OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC + OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL); OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); @@ -95,7 +89,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, break; case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); - + MCSymbol *GVSym; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) GVSym = GetSymbolWithGlobalValueBase(GV, "$stub"); @@ -109,11 +103,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, // Handle dllimport linkage. if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); - + if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoImpl::StubValueTy &StubSym = + MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: @@ -133,7 +127,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, StubSym = MachineModuleInfoImpl:: StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); } - + // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. if (GVSym->getName()[0] != '$') @@ -149,7 +143,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, SmallString<128> TempNameStr; TempNameStr += StringRef(MO.getSymbolName()); TempNameStr += StringRef("$stub"); - + MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); @@ -163,17 +157,17 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, } else { SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName()); } - + // If the name begins with a dollar-sign, enclose it in parens. We do this // to avoid having it look like an integer immediate to the assembler. - if (SymToPrint->getName()[0] != '$') + if (SymToPrint->getName()[0] != '$') O << *SymToPrint; else O << '(' << *SymToPrint << '('; break; } } - + switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); @@ -185,15 +179,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, // These affect the name of the symbol, not any suffix. break; case X86II::MO_GOT_ABSOLUTE_ADDRESS: - O << " + [.-"; - PrintPICBaseSymbol(O); - O << ']'; - break; + O << " + [.-" << *MF->getPICBaseSymbol() << ']'; + break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - O << '-'; - PrintPICBaseSymbol(O); + O << '-' << *MF->getPICBaseSymbol(); break; case X86II::MO_TLSGD: O << "@TLSGD"; break; case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; @@ -206,8 +197,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_PLT: O << "@PLT"; break; case X86II::MO_TLVP: O << "@TLVP"; break; case X86II::MO_TLVP_PIC_BASE: - O << "@TLVP" << '-'; - PrintPICBaseSymbol(O); + O << "@TLVP" << '-' << *MF->getPICBaseSymbol(); break; } } @@ -262,7 +252,7 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: { O << '$'; printSymbolOperand(MO, O); @@ -298,10 +288,10 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") && BaseReg.getReg() == X86::RIP) HasBaseReg = false; - + // HasParenPart - True if we will print out the () part of the mem ref. bool HasParenPart = IndexReg.getReg() || HasBaseReg; - + if (DispSpec.isImm()) { int DispVal = DispSpec.getImm(); if (DispVal || !HasParenPart) @@ -312,6 +302,9 @@ void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, printSymbolOperand(MI->getOperand(Op+3), O); } + if (Modifier && strcmp(Modifier, "H") == 0) + O << "+8"; + if (HasParenPart) { assert(IndexReg.getReg() != X86::ESP && "X86 doesn't allow scaling by ESP"); @@ -344,10 +337,8 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O) { - PrintPICBaseSymbol(O); - O << '\n'; - PrintPICBaseSymbol(O); - O << ':'; + O << *MF->getPICBaseSymbol() << '\n'; + O << *MF->getPICBaseSymbol() << ':'; } bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, @@ -386,14 +377,14 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNo); - + switch (ExtraCode[0]) { default: return true; // Unknown modifier. case 'a': // This is an address. Currently only 'i' and 'r' are expected. if (MO.isImm()) { O << MO.getImm(); return false; - } + } if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) { printSymbolOperand(MO, O); if (Subtarget->isPICStyleRIPRel()) @@ -470,6 +461,9 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, case 'q': // Print SImode register // These only apply to registers, ignore on mem. break; + case 'H': + printMemReference(MI, OpNo, O, "H"); + return false; case 'P': // Don't print @PLT, but do print as memory. printMemReference(MI, OpNo, O, "no-rip"); return false; @@ -480,23 +474,23 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { - if (Subtarget->isTargetDarwin()) + if (Subtarget->isTargetEnvMacho()) OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); } void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetDarwin()) { + if (Subtarget->isTargetEnvMacho()) { // All darwin targets use mach-o. MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); - + // Output stubs for dynamically-linked functions. MachineModuleInfoMachO::SymbolListTy Stubs; Stubs = MMIMacho.GetFnStubList(); if (!Stubs.empty()) { - const MCSection *TheSection = + const MCSection *TheSection = OutContext.getMachOSection("__IMPORT", "__jump_table", MCSectionMachO::S_SYMBOL_STUBS | MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE | @@ -514,7 +508,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { const char HltInsts[] = { -12, -12, -12, -12, -12 }; OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); } - + Stubs.clear(); OutStreamer.AddBlankLine(); } @@ -522,7 +516,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // Output stubs for external and common global variables. Stubs = MMIMacho.GetGVStubList(); if (!Stubs.empty()) { - const MCSection *TheSection = + const MCSection *TheSection = OutContext.getMachOSection("__IMPORT", "__pointers", MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, SectionKind::getMetadata()); @@ -580,7 +574,14 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } - if (Subtarget->isTargetCOFF()) { + if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() && + MMI->callsExternalVAFunctionWithFloatingPointArguments()) { + StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused"; + MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName); + OutStreamer.EmitSymbolAttribute(S, MCSA_Global); + } + + if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) { X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); @@ -661,12 +662,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } -MachineLocation +MachineLocation X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { MachineLocation Location; assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); // Frame address. Currently handles register +- offset only. - + if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); else { @@ -690,9 +691,9 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, O << V.getName(); O << " <- "; // Frame address. Currently handles register +- offset only. - O << '['; + O << '['; if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) - printOperand(MI, 0, O); + printOperand(MI, 0, O); else O << "undef"; O << '+'; printOperand(MI, 3, O); @@ -718,10 +719,10 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T, } // Force static initialization. -extern "C" void LLVMInitializeX86AsmPrinter() { +extern "C" void LLVMInitializeX86AsmPrinter() { RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target); RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target); - + TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter); TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter); } diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index e61be66..3a50435 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -75,8 +75,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O); - void PrintPICBaseSymbol(raw_ostream &O) const; - bool runOnMachineFunction(MachineFunction &F); void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e3409ef..a44fb69 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -48,7 +48,7 @@ def RetCC_X86Common : CallingConv<[ // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, + CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[ // weirdly; this is really the sse-regparm calling convention) in which // case they use XMM0, otherwise it is the same as the common X86 calling // conv. - CCIfInReg<CCIfSubtarget<"hasSSE2()", + CCIfInReg<CCIfSubtarget<"hasXMMInt()", CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>, CCDelegateTo<RetCC_X86Common> @@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[ // SSE2. // This can happen when a float, 2 x float, or 3 x float vector is split by // target lowering, and is returned in 1-3 sse regs. - CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, - CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, // For integers, ECX can be used as an extra return register CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>, @@ -95,14 +95,14 @@ def RetCC_X86_64_C : CallingConv<[ // returned in RAX. This disagrees with ABI documentation but is bug // compatible with gcc. CCIfType<[v1i64], CCAssignToReg<[RAX]>>, - CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo<RetCC_X86Common> ]>; // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>, + CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, // And FP in XMM0 only. CCIfType<[f32], CCAssignToReg<[XMM0]>>, @@ -161,14 +161,14 @@ def CC_X86_64_C : CallingConv<[ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32], + CCIfType<[x86mmx], CCIfSubtarget<"isTargetDarwin()", - CCIfSubtarget<"hasSSE2()", + CCIfSubtarget<"hasXMMInt()", CCPromoteToType<v2i64>>>>, // The first 8 FP/Vector arguments are passed in XMM registers. CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", + CCIfSubtarget<"hasXMM()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // The first 8 256-bit vector arguments are passed in YMM registers. @@ -192,7 +192,7 @@ def CC_X86_64_C : CallingConv<[ CCAssignToStack<32, 32>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> ]>; // Calling convention used on Win64 @@ -210,8 +210,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64], - CCBitConvertToType<i64>>, + CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>, // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], @@ -233,7 +232,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[f80], CCAssignToStack<0, 0>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>> ]>; def CC_X86_64_GHC : CallingConv<[ @@ -246,7 +245,7 @@ def CC_X86_64_GHC : CallingConv<[ // Pass in STG registers: F1, F2, F3, F4, D1, D2 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", + CCIfSubtarget<"hasXMM()", CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> ]>; @@ -264,12 +263,12 @@ def CC_X86_32_Common : CallingConv<[ // The first 3 float or double arguments, if marked 'inreg' and if the call // is not a vararg call and if SSE2 is available, are passed in SSE registers. CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64], - CCIfSubtarget<"hasSSE2()", + CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>, // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx // registers if the call is not a vararg call. - CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32], + CCIfNotVarArg<CCIfType<[x86mmx], CCAssignToReg<[MM0, MM1, MM2]>>>, // Integer/Float values get stored in stack slots that are 4 bytes in @@ -300,7 +299,7 @@ def CC_X86_32_Common : CallingConv<[ // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are // passed in the parameter area. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>; + CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>; def CC_X86_32_C : CallingConv<[ // Promote i8/i16 arguments to i32. @@ -363,7 +362,7 @@ def CC_X86_32_FastCC : CallingConv<[ // The first 3 float or double arguments, if the call is not a vararg // call and if SSE2 is available, are passed in SSE registers. CCIfNotVarArg<CCIfType<[f32,f64], - CCIfSubtarget<"hasSSE2()", + CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, // Doubles get 8-byte slots that are 8-byte aligned. @@ -380,3 +379,35 @@ def CC_X86_32_GHC : CallingConv<[ // Pass in STG registers: Base, Sp, Hp, R1 CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>> ]>; + +//===----------------------------------------------------------------------===// +// X86 Root Argument Calling Conventions +//===----------------------------------------------------------------------===// + +// This is the root argument convention for the X86-32 backend. +def CC_X86_32 : CallingConv<[ + CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>, + CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>, + CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>, + CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>, + + // Otherwise, drop to normal X86-32 CC + CCDelegateTo<CC_X86_32_C> +]>; + +// This is the root argument convention for the X86-64 backend. +def CC_X86_64 : CallingConv<[ + CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>, + + // Mingw64 and native Win64 use Win64 CC + CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>, + + // Otherwise, drop to normal X86-64 CC + CCDelegateTo<CC_X86_64_C> +]>; + +// This is the argument convention used for the entire X86 backend. +def CC_X86 : CallingConv<[ + CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>, + CCDelegateTo<CC_X86_32> +]>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 824021c..60d9d4a 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -68,8 +68,7 @@ namespace { return "X86 Machine Code Emitter"; } - void emitInstruction(const MachineInstr &MI, - const TargetInstrDesc *Desc); + void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc); void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -131,7 +130,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { const TargetInstrDesc &Desc = I->getDesc(); emitInstruction(*I, &Desc); @@ -598,9 +597,23 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, } template<class CodeEmitter> -void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, +void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc) { DEBUG(dbgs() << MI); + + // If this is a pseudo instruction, lower it. + switch (Desc->getOpcode()) { + case X86::ADD16rr_DB: Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break; + case X86::ADD32rr_DB: Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break; + case X86::ADD64rr_DB: Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break; + case X86::ADD16ri_DB: Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break; + case X86::ADD32ri_DB: Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break; + case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break; + case X86::ADD16ri8_DB: Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break; + case X86::ADD32ri8_DB: Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break; + case X86::ADD64ri8_DB: Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break; + } + MCE.processDebugLoc(MI.getDebugLoc(), true); diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index f84995d..f1d7ede 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -14,6 +14,7 @@ #include "X86ELFWriterInfo.h" #include "X86Relocations.h" #include "llvm/Function.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -24,8 +25,8 @@ using namespace llvm; // Implementation of the X86ELFWriterInfo class //===----------------------------------------------------------------------===// -X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) - : TargetELFWriterInfo(TM) { +X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_) + : TargetELFWriterInfo(is64Bit_, isLittleEndian_) { EMachine = is64Bit ? EM_X86_64 : EM_386; } @@ -35,13 +36,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { if (is64Bit) { switch(MachineRelTy) { case X86::reloc_pcrel_word: - return R_X86_64_PC32; + return ELF::R_X86_64_PC32; case X86::reloc_absolute_word: - return R_X86_64_32; + return ELF::R_X86_64_32; case X86::reloc_absolute_word_sext: - return R_X86_64_32S; + return ELF::R_X86_64_32S; case X86::reloc_absolute_dword: - return R_X86_64_64; + return ELF::R_X86_64_64; case X86::reloc_picrel_word: default: llvm_unreachable("unknown x86_64 machine relocation type"); @@ -49,9 +50,9 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { } else { switch(MachineRelTy) { case X86::reloc_pcrel_word: - return R_386_PC32; + return ELF::R_386_PC32; case X86::reloc_absolute_word: - return R_386_32; + return ELF::R_386_32; case X86::reloc_absolute_word_sext: case X86::reloc_absolute_dword: case X86::reloc_picrel_word: @@ -66,18 +67,18 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, long int Modifier) const { if (is64Bit) { switch(RelTy) { - case R_X86_64_PC32: return Modifier - 4; - case R_X86_64_32: - case R_X86_64_32S: - case R_X86_64_64: + case ELF::R_X86_64_PC32: return Modifier - 4; + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + case ELF::R_X86_64_64: return Modifier; default: llvm_unreachable("unknown x86_64 relocation type"); } } else { switch(RelTy) { - case R_386_PC32: return Modifier - 4; - case R_386_32: return Modifier; + case ELF::R_386_PC32: return Modifier - 4; + case ELF::R_386_32: return Modifier; default: llvm_unreachable("unknown x86 relocation type"); } @@ -88,19 +89,19 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { if (is64Bit) { switch(RelTy) { - case R_X86_64_PC32: - case R_X86_64_32: - case R_X86_64_32S: + case ELF::R_X86_64_PC32: + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: return 32; - case R_X86_64_64: + case ELF::R_X86_64_64: return 64; default: llvm_unreachable("unknown x86_64 relocation type"); } } else { switch(RelTy) { - case R_386_PC32: - case R_386_32: + case ELF::R_386_PC32: + case ELF::R_386_32: return 32; default: llvm_unreachable("unknown x86 relocation type"); @@ -112,20 +113,20 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { if (is64Bit) { switch(RelTy) { - case R_X86_64_PC32: + case ELF::R_X86_64_PC32: return true; - case R_X86_64_32: - case R_X86_64_32S: - case R_X86_64_64: + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + case ELF::R_X86_64_64: return false; default: llvm_unreachable("unknown x86_64 relocation type"); } } else { switch(RelTy) { - case R_386_PC32: + case ELF::R_386_PC32: return true; - case R_386_32: + case ELF::R_386_32: return false; default: llvm_unreachable("unknown x86 relocation type"); @@ -143,7 +144,7 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelOffset, unsigned RelTy) const { - if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32) + if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32) return SymOffset - (RelOffset + 4); else assert("computeRelocation unknown for this relocation type"); diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 342e6e6..a45b5bb 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -20,25 +20,8 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { - // ELF Relocation types for X86 - enum X86RelocationType { - R_386_NONE = 0, - R_386_32 = 1, - R_386_PC32 = 2 - }; - - // ELF Relocation types for X86_64 - enum X86_64RelocationType { - R_X86_64_NONE = 0, - R_X86_64_64 = 1, - R_X86_64_PC32 = 2, - R_X86_64_32 = 10, - R_X86_64_32S = 11, - R_X86_64_PC64 = 24 - }; - public: - X86ELFWriterInfo(TargetMachine &TM); + X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_); virtual ~X86ELFWriterInfo(); /// getRelocationType - Returns the target specific ELF Relocation type. diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 0c70eec..9d42ac2 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -36,7 +36,7 @@ using namespace llvm; namespace { - + class X86FastISel : public FastISel { /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -46,7 +46,7 @@ class X86FastISel : public FastISel { /// unsigned StackPtr; - /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. /// When SSE is available, use it for f32 operations. /// When SSE2 is available, use it for f64 operations. @@ -63,11 +63,18 @@ public: virtual bool TargetSelectInstruction(const Instruction *I); + /// TryToFoldLoad - The specified machine instr operand is a vreg, and that + /// vreg is being provided by the specified load instruction. If possible, + /// try to fold the load as an operand to the instruction, returning true if + /// possible. + virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); + #include "X86GenFastISel.inc" private: bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); - + bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); bool X86FastEmitStore(EVT VT, const Value *Val, @@ -77,12 +84,12 @@ private: bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); - + bool X86SelectAddress(const Value *V, X86AddressMode &AM); bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); bool X86SelectLoad(const Instruction *I); - + bool X86SelectStore(const Instruction *I); bool X86SelectRet(const Instruction *I); @@ -98,7 +105,7 @@ private: bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); - + bool X86SelectFPExt(const Instruction *I); bool X86SelectFPTrunc(const Instruction *I); @@ -107,9 +114,6 @@ private: bool X86VisitIntrinsicCall(const IntrinsicInst &I); bool X86SelectCall(const Instruction *I); - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); - CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false); - const X86InstrInfo *getInstrInfo() const { return getTargetMachine()->getInstrInfo(); } @@ -128,17 +132,18 @@ private: (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } - bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false); + bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); }; - + } // end anonymous namespace. -bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { - VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); - if (VT == MVT::Other || !VT.isSimple()) +bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { + EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); + if (evt == MVT::Other || !evt.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; - + + VT = evt.getSimpleVT(); // For now, require SSE/SSE2 for performing floating-point operations, // since x87 requires additional work. if (VT == MVT::f64 && !X86ScalarSSEf64) @@ -157,45 +162,6 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { #include "X86GenCallingConv.inc" -/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling -/// convention. -CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, - bool isTaillCall) { - if (Subtarget->is64Bit()) { - if (CC == CallingConv::GHC) - return CC_X86_64_GHC; - else if (Subtarget->isTargetWin64()) - return CC_X86_Win64_C; - else - return CC_X86_64_C; - } - - if (CC == CallingConv::X86_FastCall) - return CC_X86_32_FastCall; - else if (CC == CallingConv::X86_ThisCall) - return CC_X86_32_ThisCall; - else if (CC == CallingConv::Fast) - return CC_X86_32_FastCC; - else if (CC == CallingConv::GHC) - return CC_X86_32_GHC; - else - return CC_X86_32_C; -} - -/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling -/// convention. -CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC, - bool isTaillCall) { - if (Subtarget->is64Bit()) { - if (Subtarget->isTargetWin64()) - return RetCC_X86_Win64_C; - else - return RetCC_X86_64_C; - } - - return RetCC_X86_32_C; -} - /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. @@ -284,7 +250,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; break; } - + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)), AM).addReg(Val); return true; @@ -295,7 +261,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Val)) Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); - + // If this is a store of a simple constant, fold the constant into the store. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { unsigned Opc = 0; @@ -312,7 +278,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, Opc = X86::MOV64mi32; break; } - + if (Opc) { addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)), AM) @@ -321,11 +287,11 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, return true; } } - + unsigned ValReg = getRegForValue(Val); if (ValReg == 0) - return false; - + return false; + return X86FastEmitStore(VT, ValReg, AM); } @@ -337,7 +303,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned &ResultReg) { unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src, /*TODO: Kill=*/false); - + if (RR != 0) { ResultReg = RR; return true; @@ -354,11 +320,11 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Don't walk into other basic blocks; it's possible we haven't // visited them yet, so the instructions may not yet be assigned // virtual registers. - if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) - return false; - - Opcode = I->getOpcode(); - U = I; + if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { Opcode = C->getOpcode(); U = C; @@ -472,7 +438,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { AM.Disp = (uint32_t)Disp; if (X86SelectAddress(U->getOperand(0), AM)) return true; - + // If we couldn't merge the sub value into this addr mode, revert back to // our address and just match the value instead of completely failing. AM = SavedAM; @@ -501,7 +467,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Okay, we've committed to selecting this global. Set up the basic address. AM.GV = GV; - + // Allow the subtarget to classify the global. unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); @@ -510,7 +476,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // FIXME: How do we know Base.Reg is free?? AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } - + // Unless the ABI requires an extra load, return a direct reference to // the global. if (!isGlobalStubReference(GVFlags)) { @@ -523,7 +489,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { AM.GVOpFlags = GVFlags; return true; } - + // Ok, we need to do a load from a stub. If we've already loaded from this // stub, reuse the loaded pointer, otherwise emit the load now. DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); @@ -545,14 +511,14 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; - + if (Subtarget->isPICStyleRIPRel()) StubAM.Base.Reg = X86::RIP; } else { Opc = X86::MOV32rm; RC = X86::GR32RegisterClass; } - + LoadReg = createResultReg(RC); MachineInstrBuilder LoadMI = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); @@ -564,7 +530,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Prevent loading GV stub multiple times in same MBB. LocalValueMap[V] = LoadReg; } - + // Now construct the final address. Note that the Disp, Scale, // and Index values may already be set here. AM.Base.Reg = LoadReg; @@ -638,7 +604,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { // Okay, we've committed to selecting this global. Set up the basic address. AM.GV = GV; - + // No ABI requires an extra load for anything other than DLLImport, which // we rejected above. Return a direct reference to the global. if (Subtarget->isPICStyleRIPRel()) { @@ -651,7 +617,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { } else if (Subtarget->isPICStyleGOT()) { AM.GVOpFlags = X86II::MO_GOTOFF; } - + return true; } @@ -674,7 +640,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { /// X86SelectStore - Select and emit code to implement store instructions. bool X86FastISel::X86SelectStore(const Instruction *I) { - EVT VT; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -724,7 +690,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); - CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC)); + CCInfo.AnalyzeReturn(Outs, RetCC_X86); const Value *RV = Ret->getOperand(0); unsigned Reg = getRegForValue(RV); @@ -736,7 +702,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { return false; CCValAssign &VA = ValLocs[0]; - + // Don't bother handling odd stuff for now. if (VA.getLocInfo() != CCValAssign::Full) return false; @@ -745,7 +711,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { return false; // TODO: For now, don't try to handle cases where getLocInfo() // says Full but the types don't match. - if (VA.getValVT() != TLI.getValueType(RV->getType())) + if (TLI.getValueType(RV->getType()) != VA.getValVT()) return false; // The calling-convention tables for x87 returns don't tell @@ -775,7 +741,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(const Instruction *I) { - EVT VT; + MVT VT; if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) return false; @@ -826,11 +792,11 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT) { unsigned Op0Reg = getRegForValue(Op0); if (Op0Reg == 0) return false; - + // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Op1)) Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); - + // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use // CMPri, otherwise use CMPrr. @@ -842,23 +808,23 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, return true; } } - + unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); if (CompareOpc == 0) return false; - + unsigned Op1Reg = getRegForValue(Op1); if (Op1Reg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc)) .addReg(Op0Reg) .addReg(Op1Reg); - + return true; } bool X86FastISel::X86SelectCmp(const Instruction *I) { const CmpInst *CI = cast<CmpInst>(I); - EVT VT; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT)) return false; @@ -869,13 +835,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { case CmpInst::FCMP_OEQ: { if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) return false; - + unsigned EReg = createResultReg(&X86::GR8RegClass); unsigned NPReg = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNPr), NPReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); UpdateValueMap(I, ResultReg); return true; @@ -908,7 +874,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; - + case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; @@ -930,7 +896,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { // Emit a compare of Op0/Op1. if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg); UpdateValueMap(I, ResultReg); return true; @@ -995,7 +961,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - + case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; @@ -1009,7 +975,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { default: return false; } - + const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); if (SwapArgs) std::swap(Op0, Op1); @@ -1017,7 +983,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Emit a compare of the LHS and RHS, setting the flags. if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc)) .addMBB(TrueMBB); @@ -1070,8 +1036,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { } const TargetInstrDesc &TID = MI.getDesc(); - if (TID.hasUnmodeledSideEffects() || - TID.hasImplicitDefOfPhysReg(X86::EFLAGS)) + if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) || + MI.hasUnmodeledSideEffects()) break; } @@ -1147,22 +1113,22 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { return false; } - EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); - if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) + MVT VT; + if (!isTypeLegal(I->getType(), VT)) return false; unsigned Op0Reg = getRegForValue(I->getOperand(0)); if (Op0Reg == 0) return false; - + // Fold immediate in shl(x,3). if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); UpdateValueMap(I, ResultReg); return true; } - + unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), @@ -1183,23 +1149,26 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { } bool X86FastISel::X86SelectSelect(const Instruction *I) { - EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); - if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) + MVT VT; + if (!isTypeLegal(I->getType(), VT)) return false; - + + // We only use cmov here, if we don't have a cmov instruction bail. + if (!Subtarget->hasCMov()) return false; + unsigned Opc = 0; const TargetRegisterClass *RC = NULL; - if (VT.getSimpleVT() == MVT::i16) { + if (VT == MVT::i16) { Opc = X86::CMOVE16rr; RC = &X86::GR16RegClass; - } else if (VT.getSimpleVT() == MVT::i32) { + } else if (VT == MVT::i32) { Opc = X86::CMOVE32rr; RC = &X86::GR32RegClass; - } else if (VT.getSimpleVT() == MVT::i64) { + } else if (VT == MVT::i64) { Opc = X86::CMOVE64rr; RC = &X86::GR64RegClass; } else { - return false; + return false; } unsigned Op0Reg = getRegForValue(I->getOperand(0)); @@ -1264,7 +1233,7 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { return false; EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - + // This code only handles truncation to byte right now. if (DstVT != MVT::i8 && DstVT != MVT::i1) // All other cases should be handled by the tblgen generated code. @@ -1335,21 +1304,21 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Grab the frame index. X86AddressMode AM; if (!X86SelectAddress(Slot, AM)) return false; - + if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; - + return true; } case Intrinsic::objectsize: { ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); const Type *Ty = I.getCalledFunction()->getReturnType(); - + assert(CI && "Non-constant type in Intrinsic::objectsize?"); - - EVT VT; + + MVT VT; if (!isTypeLegal(Ty, VT)) return false; - + unsigned OpC = 0; if (VT == MVT::i32) OpC = X86::MOV32ri; @@ -1357,7 +1326,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { OpC = X86::MOV64ri; else return false; - + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg). addImm(CI->isZero() ? -1ULL : 0); @@ -1392,7 +1361,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { const Type *RetTy = cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); - EVT VT; + MVT VT; if (!isTypeLegal(RetTy, VT)) return false; @@ -1429,7 +1398,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { ResultReg = DestReg1+1; else ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); - + unsigned Opc = X86::SETBr; if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) Opc = X86::SETOr; @@ -1476,7 +1445,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Handle *simple* calls for now. const Type *RetTy = CS.getType(); - EVT RetVT; + MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(RetTy, RetVT, true)) @@ -1506,7 +1475,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Deal with call operands first. SmallVector<const Value *, 8> ArgVals; SmallVector<unsigned, 8> Args; - SmallVector<EVT, 8> ArgVTs; + SmallVector<MVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; Args.reserve(CS.arg_size()); ArgVals.reserve(CS.arg_size()); @@ -1532,7 +1501,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { return false; const Type *ArgTy = (*i)->getType(); - EVT ArgVT; + MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); @@ -1547,13 +1516,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); - + // Allocate shadow area for Win64 - if (Subtarget->isTargetWin64()) { - CCInfo.AllocateStack(32, 8); + if (Subtarget->isTargetWin64()) { + CCInfo.AllocateStack(32, 8); } - CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -1570,7 +1539,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { CCValAssign &VA = ArgLocs[i]; unsigned Arg = Args[VA.getValNo()]; EVT ArgVT = ArgVTs[VA.getValNo()]; - + // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); @@ -1578,20 +1547,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { case CCValAssign::SExt: { bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), Arg, ArgVT, Arg); - assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; - Emitted = true; + assert(Emitted && "Failed to emit a sext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::ZExt: { bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), Arg, ArgVT, Arg); - assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; - Emitted = true; + assert(Emitted && "Failed to emit a zext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::AExt: { + // We don't handle MMX parameters yet. + if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128) + return false; bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), Arg, ArgVT, Arg); if (!Emitted) @@ -1600,21 +1570,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (!Emitted) Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), Arg, ArgVT, Arg); - - assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; + + assert(Emitted && "Failed to emit a aext!"); (void)Emitted; ArgVT = VA.getLocVT(); break; } case CCValAssign::BCvt: { - unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), - ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); + unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(), + ISD::BITCAST, Arg, /*TODO: Kill=*/false); assert(BC != 0 && "Failed to emit a bitcast!"); Arg = BC; ArgVT = VA.getLocVT(); break; } } - + if (VA.isRegLoc()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); @@ -1625,7 +1595,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { AM.Base.Reg = StackPtr; AM.Disp = LocMemOffset; const Value *ArgVal = ArgVals[VA.getValNo()]; - + // If this is a really simple value, emit this with the Value* version of // X86FastEmitStore. If it isn't simple, we don't want to do this, as it // can cause us to reevaluate the argument. @@ -1637,13 +1607,13 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } // ELF / PIC requires GOT in the EBX register before function calls via PLT - // GOT pointer. + // GOT pointer. if (Subtarget->isPICStyleGOT()) { unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base); } - + // Issue the call. MachineInstrBuilder MIB; if (CalleeOp) { @@ -1657,7 +1627,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { CallOpc = X86::CALL32r; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addReg(CalleeOp); - + } else { // Direct call. assert(GV && "Not a direct call"); @@ -1668,10 +1638,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { CallOpc = X86::CALL64pcrel32; else CallOpc = X86::CALLpcrel32; - + // See if we need any target-specific flags on the GV operand. unsigned char OpFlags = 0; - + // On ELF targets, in both X86-64 and X86-32 mode, direct calls to // external symbols most go through the PLT in PIC mode. If the symbol // has hidden or protected visibility, or if it is static or local, then @@ -1688,8 +1658,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // automatically synthesizes these stubs. OpFlags = X86II::MO_DARWIN_STUB; } - - + + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, OpFlags); } @@ -1709,7 +1679,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Now handle call return value (if any). SmallVector<unsigned, 4> UsedRegs; - if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { + if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); @@ -1718,7 +1688,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); EVT CopyVT = RVLocs[0].getValVT(); TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); - + // If this is a call to a function that returns an fp value on the x87 fp // stack, but where we prefer to use the value in xmm registers, copy it // out as F80 and use a truncate to move it from fp stack reg to xmm reg. @@ -1756,7 +1726,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (AndToI1) { // Mask out all but lowest bit for some call which produces an i1. unsigned AndResult = createResultReg(X86::GR8RegisterClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); ResultReg = AndResult; } @@ -1823,14 +1793,14 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { } unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { - EVT VT; + MVT VT; if (!isTypeLegal(C->getType(), VT)) return false; - + // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return false; case MVT::i8: Opc = X86::MOV8rm; @@ -1871,7 +1841,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { // No f80 support yet. return false; } - + // Materialize addresses with LEA instructions. if (isa<GlobalValue>(C)) { X86AddressMode AM; @@ -1887,14 +1857,14 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { } return 0; } - + // MachineConstantPool wants an explicit alignment. unsigned Align = TD.getPrefTypeAlignment(C->getType()); if (Align == 0) { // Alignment of vector types. FIXME! Align = TD.getTypeAllocSize(C->getType()); } - + // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; unsigned char OpFlag = 0; @@ -1941,6 +1911,34 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { return ResultReg; } +/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// vreg is being provided by the specified load instruction. If possible, +/// try to fold the load as an operand to the instruction, returning true if +/// possible. +bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { + X86AddressMode AM; + if (!X86SelectAddress(LI->getOperand(0), AM)) + return false; + + X86InstrInfo &XII = (X86InstrInfo&)TII; + + unsigned Size = TD.getTypeAllocSize(LI->getType()); + unsigned Alignment = LI->getAlignment(); + + SmallVector<MachineOperand, 8> AddrOps; + AM.getFullAddress(AddrOps); + + MachineInstr *Result = + XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); + if (Result == 0) return false; + + FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); + MI->eraseFromParent(); + return true; +} + + namespace llvm { llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { return new X86FastISel(funcInfo); diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h index 96e0aae..17d242a 100644 --- a/lib/Target/X86/X86FixupKinds.h +++ b/lib/Target/X86/X86FixupKinds.h @@ -15,11 +15,17 @@ namespace llvm { namespace X86 { enum Fixups { - reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. - reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 - reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw - reloc_riprel_4byte, // 32-bit rip-relative - reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq + reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative + reloc_riprel_4byte_movq_load, // 32-bit rip-relative in movq + reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4 + // this will be sign extended at + // runtime. + reloc_global_offset_table, // 32-bit, relative to the start + // of the instruction. Used only + // for _GLOBAL_OFFSET_TABLE_. + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; } } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index e6ebf66..3aaa693 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -51,6 +52,7 @@ namespace { struct FPS : public MachineFunctionPass { static char ID; FPS() : MachineFunctionPass(ID) { + initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); // This is really only to keep valgrind quiet. // The logic in isLive() is too much for it. memset(Stack, 0, sizeof(Stack)); @@ -59,6 +61,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<EdgeBundles>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -94,7 +97,7 @@ namespace { // FixStack[i] == getStackEntry(i) for all i < FixCount. unsigned char FixStack[8]; - LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {} + LiveBundle() : Mask(0), FixCount(0) {} // Have the live registers been assigned a stack order yet? bool isFixed() const { return !Mask || FixCount; } @@ -104,10 +107,8 @@ namespace { // with no live FP registers. SmallVector<LiveBundle, 8> LiveBundles; - // Map each MBB in the current function to an (ingoing, outgoing) index into - // LiveBundles. Blocks with no FP registers live in or out map to (0, 0) - // and are not actually stored in the map. - DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle; + // The edge bundle analysis provides indices into the LiveBundles vector. + EdgeBundles *Bundles; // Return a bitmask of FP registers in block's live-in list. unsigned calcLiveInMask(MachineBasicBlock *MBB) { @@ -167,7 +168,8 @@ namespace { /// getStackEntry - Return the X86::FP<n> register in register ST(i). unsigned getStackEntry(unsigned STi) const { - assert(STi < StackTop && "Access past stack top!"); + if (STi >= StackTop) + report_fatal_error("Access past stack top!"); return Stack[StackTop-1-STi]; } @@ -180,7 +182,8 @@ namespace { // pushReg - Push the specified FP<n> register onto the stack. void pushReg(unsigned Reg) { assert(Reg < 8 && "Register number out of range!"); - assert(StackTop < 8 && "Stack overflow!"); + if (StackTop >= 8) + report_fatal_error("Stack overflow!"); Stack[StackTop] = Reg; RegMap[Reg] = StackTop++; } @@ -197,7 +200,8 @@ namespace { std::swap(RegMap[RegNo], RegMap[RegOnTop]); // Swap stack slot contents. - assert(RegMap[RegOnTop] < StackTop); + if (RegMap[RegOnTop] >= StackTop) + report_fatal_error("Access past stack top!"); std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); // Emit an fxch to update the runtime processors version of the state. @@ -281,6 +285,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { // Early exit. if (!FPIsUsed) return false; + Bundles = &getAnalysis<EdgeBundles>(); TII = MF.getTarget().getInstrInfo(); // Prepare cross-MBB liveness. @@ -305,7 +310,6 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { if (Processed.insert(BB)) Changed |= processBasicBlock(MF, *BB); - BlockBundle.clear(); LiveBundles.clear(); return Changed; @@ -318,90 +322,16 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { /// registers may be implicitly defined, or not used by all successors. void FPS::bundleCFG(MachineFunction &MF) { assert(LiveBundles.empty() && "Stale data in LiveBundles"); - assert(BlockBundle.empty() && "Stale data in BlockBundle"); - SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp; + LiveBundles.resize(Bundles->getNumBundles()); - // LiveBundle[0] is the empty live-in set. - LiveBundles.resize(1); - - // First gather the actual live-in masks for all MBBs. + // Gather the actual live-in masks for all MBBs. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = I; const unsigned Mask = calcLiveInMask(MBB); if (!Mask) continue; - // Ingoing bundle index. - unsigned &Idx = BlockBundle[MBB].first; - // Already assigned an ingoing bundle? - if (Idx) - continue; - // Allocate a new LiveBundle struct for this block's live-ins. - const unsigned BundleIdx = Idx = LiveBundles.size(); - DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#" - << MBB->getNumber()); - LiveBundles.push_back(Mask); - LiveBundle &Bundle = LiveBundles.back(); - - // Make sure all predecessors have the same live-out set. - PropUp.insert(MBB); - - // Keep pushing liveness up and down the CFG until convergence. - // Only critical edges cause iteration here, but when they do, multiple - // blocks can be assigned to the same LiveBundle index. - do { - // Assign BundleIdx as liveout from predecessors in PropUp. - for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(), - E = PropUp.end(); I != E; ++I) { - MachineBasicBlock *MBB = *I; - for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(), - LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) { - MachineBasicBlock *PredMBB = *LinkI; - // PredMBB's liveout bundle should be set to LIIdx. - unsigned &Idx = BlockBundle[PredMBB].second; - if (Idx) { - assert(Idx == BundleIdx && "Inconsistent CFG"); - continue; - } - Idx = BundleIdx; - DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber()); - // Propagate to siblings. - if (PredMBB->succ_size() > 1) - PropDown.insert(PredMBB); - } - } - PropUp.clear(); - - // Assign BundleIdx as livein to successors in PropDown. - for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(), - E = PropDown.end(); I != E; ++I) { - MachineBasicBlock *MBB = *I; - for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(), - LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) { - MachineBasicBlock *SuccMBB = *LinkI; - // LinkMBB's livein bundle should be set to BundleIdx. - unsigned &Idx = BlockBundle[SuccMBB].first; - if (Idx) { - assert(Idx == BundleIdx && "Inconsistent CFG"); - continue; - } - Idx = BundleIdx; - DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber()); - // Propagate to siblings. - if (SuccMBB->pred_size() > 1) - PropUp.insert(SuccMBB); - // Also accumulate the bundle liveness mask from the liveins here. - Bundle.Mask |= calcLiveInMask(SuccMBB); - } - } - PropDown.clear(); - } while (!PropUp.empty()); - DEBUG({ - dbgs() << " live:"; - for (unsigned i = 0; i < 8; ++i) - if (Bundle.Mask & (1<<i)) - dbgs() << " %FP" << i; - dbgs() << '\n'; - }); + // Update MBB ingoing bundle mask. + LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask; } } @@ -489,13 +419,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { return Changed; } -/// setupBlockStack - Use the BlockBundle map to set up our model of the stack +/// setupBlockStack - Use the live bundles to set up our model of the stack /// to match predecessors' live out stack. void FPS::setupBlockStack() { DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() << " derived from " << MBB->getName() << ".\n"); StackTop = 0; - const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first]; + // Get the live-in bundle for MBB. + const LiveBundle &Bundle = + LiveBundles[Bundles->getBundle(MBB->getNumber(), false)]; if (!Bundle.Mask) { DEBUG(dbgs() << "Block has no FP live-ins.\n"); @@ -532,7 +464,8 @@ void FPS::finishBlockStack() { DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() << " derived from " << MBB->getName() << ".\n"); - unsigned BundleIdx = BlockBundle.lookup(MBB).second; + // Get MBB's live-out bundle. + unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true); LiveBundle &Bundle = LiveBundles[BundleIdx]; // We may need to kill and define some registers to match successors. @@ -572,7 +505,8 @@ namespace { friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; } - friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) { + friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V, + const TableEntry &TE) { return V < TE.from; } }; @@ -824,7 +758,8 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) { MachineInstr* MI = I; DebugLoc dl = MI->getDebugLoc(); ASSERT_SORTED(PopTable); - assert(StackTop > 0 && "Cannot pop empty stack!"); + if (StackTop == 0) + report_fatal_error("Cannot pop empty stack!"); RegMap[Stack[--StackTop]] = ~0; // Update state // Check to see if there is a popping version of this instruction... @@ -1016,7 +951,8 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { MI->getOpcode() == X86::ISTT_FP32m || MI->getOpcode() == X86::ISTT_FP64m || MI->getOpcode() == X86::ST_FP80m) { - assert(StackTop > 0 && "Stack empty??"); + if (StackTop == 0) + report_fatal_error("Stack empty??"); --StackTop; } else if (KillsSrc) { // Last use of operand? popStackAfter(I); @@ -1047,7 +983,8 @@ void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { // If this is the last use of the source register, just make sure it's on // the top of the stack. moveToTop(Reg, I); - assert(StackTop > 0 && "Stack cannot be empty!"); + if (StackTop == 0) + report_fatal_error("Stack cannot be empty!"); --StackTop; pushReg(getFPReg(MI->getOperand(0))); } else { @@ -1300,7 +1237,6 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { /// void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { default: llvm_unreachable("Unknown SpecialFP instruction!"); case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! @@ -1341,7 +1277,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { std::swap(RegMap[RegNo], RegMap[RegOnTop]); // Swap stack slot contents. - assert(RegMap[RegOnTop] < StackTop); + if (RegMap[RegOnTop] >= StackTop) + report_fatal_error("Access past stack top!"); std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); break; } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp new file mode 100644 index 0000000..0a3f931 --- /dev/null +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -0,0 +1,994 @@ +//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "X86FrameLowering.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86TargetMachine.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallSet.h" + +using namespace llvm; + +// FIXME: completely move here. +extern cl::opt<bool> ForceStackAlign; + +bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. +bool X86FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineModuleInfo &MMI = MF.getMMI(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + + return (DisableFramePointerElim(MF) || + RI->needsStackRealignment(MF) || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken() || + MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || + MMI.callsUnwindInit()); +} + +static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + +static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::ADD64ri8; + return X86::ADD64ri32; + } else { + if (isInt<8>(Imm)) + return X86::ADD32ri8; + return X86::ADD32ri; + } +} + +/// findDeadCallerSavedReg - Return a caller-saved register that isn't live +/// when it reaches the "return" instruction. We can then pop a stack object +/// to this register without worry about clobbering it. +static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetRegisterInfo &TRI, + bool Is64Bit) { + const MachineFunction *MF = MBB.getParent(); + const Function *F = MF->getFunction(); + if (!F || MF->getMMI().callsEHReturn()) + return 0; + + static const unsigned CallerSavedRegs32Bit[] = { + X86::EAX, X86::EDX, X86::ECX + }; + + static const unsigned CallerSavedRegs64Bit[] = { + X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, + X86::R8, X86::R9, X86::R10, X86::R11 + }; + + unsigned Opc = MBBI->getOpcode(); + switch (Opc) { + default: return 0; + case X86::RET: + case X86::RETI: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: { + SmallSet<unsigned, 8> Uses; + for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MBBI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI) + Uses.insert(*AsI); + } + + const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; + for (; *CS; ++CS) + if (!Uses.count(*CS)) + return *CS; + } + } + + return 0; +} + + +/// emitSPUpdate - Emit a series of instructions to increment / decrement the +/// stack pointer by a constant value. +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, int64_t NumBytes, + bool Is64Bit, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI) { + bool isSub = NumBytes < 0; + uint64_t Offset = isSub ? -NumBytes : NumBytes; + unsigned Opc = isSub ? + getSUBriOpcode(Is64Bit, Offset) : + getADDriOpcode(Is64Bit, Offset); + uint64_t Chunk = (1LL << 31) - 1; + DebugLoc DL = MBB.findDebugLoc(MBBI); + + while (Offset) { + uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; + if (ThisVal == (Is64Bit ? 8 : 4)) { + // Use push / pop instead. + unsigned Reg = isSub + ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) + : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); + if (Reg) { + Opc = isSub + ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) + : (Is64Bit ? X86::POP64r : X86::POP32r); + BuildMI(MBB, MBBI, DL, TII.get(Opc)) + .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); + Offset -= ThisVal; + continue; + } + } + + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr) + .addImm(ThisVal); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + Offset -= ThisVal; + } +} + +/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. +static +void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, uint64_t *NumBytes = NULL) { + if (MBBI == MBB.begin()) return; + + MachineBasicBlock::iterator PI = prior(MBBI); + unsigned Opc = PI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes += PI->getOperand(2).getImm(); + MBB.erase(PI); + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes -= PI->getOperand(2).getImm(); + MBB.erase(PI); + } +} + +/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. +static +void mergeSPUpdatesDown(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, uint64_t *NumBytes = NULL) { + // FIXME: THIS ISN'T RUN!!! + return; + + if (MBBI == MBB.end()) return; + + MachineBasicBlock::iterator NI = llvm::next(MBBI); + if (NI == MBB.end()) return; + + unsigned Opc = NI->getOpcode(); + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + NI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes -= NI->getOperand(2).getImm(); + MBB.erase(NI); + MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + NI->getOperand(0).getReg() == StackPtr) { + if (NumBytes) + *NumBytes += NI->getOperand(2).getImm(); + MBB.erase(NI); + MBBI = NI; + } +} + +/// mergeSPUpdates - Checks the instruction before/after the passed +/// instruction. If it is an ADD/SUB instruction it is deleted argument and the +/// stack adjustment is returned as a positive value for ADD and a negative for +/// SUB. +static int mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + unsigned StackPtr, + bool doMergeWithPrevious) { + if ((doMergeWithPrevious && MBBI == MBB.begin()) || + (!doMergeWithPrevious && MBBI == MBB.end())) + return 0; + + MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; + MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); + unsigned Opc = PI->getOpcode(); + int Offset = 0; + + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || + Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && + PI->getOperand(0).getReg() == StackPtr){ + Offset += PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && + PI->getOperand(0).getReg() == StackPtr) { + Offset -= PI->getOperand(2).getImm(); + MBB.erase(PI); + if (!doMergeWithPrevious) MBBI = NI; + } + + return Offset; +} + +static bool isEAXLiveIn(MachineFunction &MF) { + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), + EE = MF.getRegInfo().livein_end(); II != EE; ++II) { + unsigned Reg = II->first; + + if (Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL) + return true; + } + + return false; +} + +void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, + MCSymbol *Label, + unsigned FramePtr) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + if (CSI.empty()) return; + + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + const TargetData *TD = TM.getTargetData(); + bool HasFP = hasFP(MF); + + // Calculate amount of bytes used for return address storing. + int stackGrowth = -TD->getPointerSize(); + + // FIXME: This is dirty hack. The code itself is pretty mess right now. + // It should be rewritten from scratch and generalized sometimes. + + // Determine maximum offset (minumum due to stack growth). + int64_t MaxOffset = 0; + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) + MaxOffset = std::min(MaxOffset, + MFI->getObjectOffset(I->getFrameIdx())); + + // Calculate offsets. + int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; + for (std::vector<CalleeSavedInfo>::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + Offset = MaxOffset - Offset + saveAreaOffset; + + // Don't output a new machine move if we're re-saving the frame + // pointer. This happens when the PrologEpilogInserter has inserted an extra + // "PUSH" of the frame pointer -- the "emitPrologue" method automatically + // generates one when frame pointers are used. If we generate a "machine + // move" for this extra "PUSH", the linker will lose track of the fact that + // the frame pointer should have the value of the first "PUSH" when it's + // trying to unwind. + // + // FIXME: This looks inelegant. It's possibly correct, but it's covering up + // another bug. I.e., one where we generate a prolog like this: + // + // pushl %ebp + // movl %esp, %ebp + // pushl %ebp + // pushl %esi + // ... + // + // The immediate re-push of EBP is unnecessary. At the least, it's an + // optimization bug. EBP can be used as a scratch register in certain + // cases, but probably not when we have a frame pointer. + if (HasFP && FramePtr == Reg) + continue; + + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(Label, CSDst, CSSrc)); + } +} + +/// emitPrologue - Push callee-saved registers onto the stack, which +/// automatically adjust the stack pointer. Adjust the stack pointer to allocate +/// space for local variables. Also emit labels used by the exception handler to +/// generate the exception handling frames. +void X86FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *Fn = MF.getFunction(); + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + const X86InstrInfo &TII = *TM.getInstrInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + bool needsFrameMoves = MMI.hasDebugInfo() || + !Fn->doesNotThrow() || UnwindTablesMandatory; + uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. + bool HasFP = hasFP(MF); + bool Is64Bit = STI.is64Bit(); + bool IsWin64 = STI.isTargetWin64(); + unsigned StackAlign = getStackAlignment(); + unsigned SlotSize = RegInfo->getSlotSize(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned StackPtr = RegInfo->getStackRegister(); + + DebugLoc DL; + + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum SlotSize. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else if (MaxAlign < SlotSize) + MaxAlign = SlotSize; + } + + // Add RETADDR move area to callee saved frame size. + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) + X86FI->setCalleeSavedFrameSize( + X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); + + // If this is x86-64 and the Red Zone is not disabled, if we are a leaf + // function, and use up to 128 bytes of stack space, don't have a frame + // pointer, calls, or dynamic alloca then we do not need to adjust the + // stack pointer (we fit in the Red Zone). + if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && + !RegInfo->needsStackRealignment(MF) && + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + !IsWin64) { // Win64 has no Red Zone + uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); + if (HasFP) MinSize += SlotSize; + StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); + MFI->setStackSize(StackSize); + } + + // Insert stack pointer adjustment for later moving of return addr. Only + // applies to tail call optimized functions where the callee argument stack + // size is bigger than the callers. + if (TailCallReturnAddrDelta < 0) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), + StackPtr) + .addReg(StackPtr) + .addImm(-TailCallReturnAddrDelta); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. + } + + // Mapping for machine moves: + // + // DST: VirtualFP AND + // SRC: VirtualFP => DW_CFA_def_cfa_offset + // ELSE => DW_CFA_def_cfa + // + // SRC: VirtualFP AND + // DST: Register => DW_CFA_def_cfa_register + // + // ELSE + // OFFSET < 0 => DW_CFA_offset_extended_sf + // REG < 64 => DW_CFA_offset + Reg + // ELSE => DW_CFA_offset_extended + + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + const TargetData *TD = MF.getTarget().getTargetData(); + uint64_t NumBytes = 0; + int stackGrowth = -TD->getPointerSize(); + + if (HasFP) { + // Calculate required stack adjustment. + uint64_t FrameSize = StackSize - SlotSize; + if (RegInfo->needsStackRealignment(MF)) + FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; + + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + + // Get the offset of the stack slot for the EBP register, which is + // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. + // Update the frame offset adjustment. + MFI->setOffsetAdjustment(-NumBytes); + + // Save EBP/RBP into the appropriate stack slot. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) + .addReg(FramePtr, RegState::Kill); + + if (needsFrameMoves) { + // Mark the place where EBP/RBP was saved. + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + + // Define the current CFA rule to use the provided offset. + if (StackSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + } else { + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + } + + // Change the rule for the FramePtr to be an "offset" rule. + MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); + MachineLocation FPSrc(FramePtr); + Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); + } + + // Update EBP with the new base value... + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) + .addReg(StackPtr); + + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer becomes valid. + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + + // Define the current CFA to use the EBP/RBP register. + MachineLocation FPDst(FramePtr); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); + } + + // Mark the FramePtr as live-in in every block except the entry. + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); + I != E; ++I) + I->addLiveIn(FramePtr); + + // Realign stack + if (RegInfo->needsStackRealignment(MF)) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), + StackPtr).addReg(StackPtr).addImm(-MaxAlign); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } + } else { + NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); + } + + // Skip the callee-saved push instructions. + bool PushedRegs = false; + int StackOffset = 2 * stackGrowth; + + while (MBBI != MBB.end() && + (MBBI->getOpcode() == X86::PUSH32r || + MBBI->getOpcode() == X86::PUSH64r)) { + PushedRegs = true; + ++MBBI; + + if (!HasFP && needsFrameMoves) { + // Mark callee-saved push instruction. + MCSymbol *Label = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); + + // Define the current CFA rule to use the provided offset. + unsigned Ptr = StackSize ? + MachineLocation::VirtualFP : StackPtr; + MachineLocation SPDst(Ptr); + MachineLocation SPSrc(Ptr, StackOffset); + Moves.push_back(MachineMove(Label, SPDst, SPSrc)); + StackOffset += stackGrowth; + } + } + + DL = MBB.findDebugLoc(MBBI); + + // If there is an SUB32ri of ESP immediately before this instruction, merge + // the two. This can be the case when tail call elimination is enabled and + // the callee has more arguments then the caller. + NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + + // If there is an ADD32ri or SUB32ri of ESP immediately after this + // instruction, merge the two instructions. + mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); + + // Adjust stack pointer: ESP -= numbytes. + + // Windows and cygwin/mingw require a prologue helper routine when allocating + // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw + // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the + // stack and adjust the stack pointer in one go. The 64-bit version of + // __chkstk is only responsible for probing the stack. The 64-bit prologue is + // responsible for adjusting the stack pointer. Touching the stack at 4K + // increments is necessary to ensure that the guard pages used by the OS + // virtual memory manager are allocated in correct sequence. + if (NumBytes >= 4096 && + (STI.isTargetCygMing() || STI.isTargetWin32()) && + !STI.isTargetEnvMacho()) { + // Check whether EAX is livein for this function. + bool isEAXAlive = isEAXLiveIn(MF); + + const char *StackProbeSymbol = + STI.isTargetWindows() ? "_chkstk" : "_alloca"; + if (Is64Bit && STI.isTargetCygMing()) + StackProbeSymbol = "__chkstk"; + unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; + if (!isEAXAlive) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes); + BuildMI(MBB, MBBI, DL, TII.get(CallOp)) + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + } else { + // Save EAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) + .addReg(X86::EAX, RegState::Kill); + + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already + // allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes - 4); + BuildMI(MBB, MBBI, DL, TII.get(CallOp)) + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MBB.insert(MBBI, MI); + } + } else if (NumBytes >= 4096 && + STI.isTargetWin64() && + !STI.isTargetEnvMacho()) { + // Sanity check that EAX is not livein for this function. It should + // not be, so throw an assert. + assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!"); + + // Handle the 64-bit Windows ABI case where we need to call __chkstk. + // Function prologue is responsible for adjusting the stack pointer. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes); + BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32)) + .addExternalSymbol("__chkstk") + .addReg(StackPtr, RegState::Define | RegState::Implicit); + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + TII, *RegInfo); + } else if (NumBytes) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + TII, *RegInfo); + + if ((NumBytes || PushedRegs) && needsFrameMoves) { + // Mark end of stack pointer adjustment. + MCSymbol *Label = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); + + if (!HasFP && NumBytes) { + // Define the current CFA rule to use the provided offset. + if (StackSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, + -StackSize + stackGrowth); + Moves.push_back(MachineMove(Label, SPDst, SPSrc)); + } else { + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(Label, SPDst, SPSrc)); + } + } + + // Emit DWARF info specifying the offsets of the callee-saved registers. + if (PushedRegs) + emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); + } +} + +void X86FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + const X86InstrInfo &TII = *TM.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI != MBB.end() && "Returning block has no instructions"); + unsigned RetOpcode = MBBI->getOpcode(); + DebugLoc DL = MBBI->getDebugLoc(); + bool Is64Bit = STI.is64Bit(); + unsigned StackAlign = getStackAlignment(); + unsigned SlotSize = RegInfo->getSlotSize(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned StackPtr = RegInfo->getStackRegister(); + + switch (RetOpcode) { + default: + llvm_unreachable("Can only insert epilog into returning blocks"); + case X86::RET: + case X86::RETI: + case X86::TCRETURNdi: + case X86::TCRETURNri: + case X86::TCRETURNmi: + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + case X86::EH_RETURN: + case X86::EH_RETURN64: + break; // These are ok + } + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t StackSize = MFI->getStackSize(); + uint64_t MaxAlign = MFI->getMaxAlignment(); + unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + uint64_t NumBytes = 0; + + // If we're forcing a stack realignment we can't rely on just the frame + // info, we need to know the ABI stack alignment as well in case we + // have a call out. Otherwise just make sure we have some alignment - we'll + // go with the minimum. + if (ForceStackAlign) { + if (MFI->hasCalls()) + MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; + else + MaxAlign = MaxAlign ? MaxAlign : 4; + } + + if (hasFP(MF)) { + // Calculate required stack adjustment. + uint64_t FrameSize = StackSize - SlotSize; + if (RegInfo->needsStackRealignment(MF)) + FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; + + NumBytes = FrameSize - CSSize; + + // Pop EBP. + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); + } else { + NumBytes = StackSize - CSSize; + } + + // Skip the callee-saved pop instructions. + MachineBasicBlock::iterator LastCSPop = MBBI; + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = prior(MBBI); + unsigned Opc = PI->getOpcode(); + + if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && + !PI->getDesc().isTerminator()) + break; + + --MBBI; + } + + DL = MBBI->getDebugLoc(); + + // If there is an ADD32ri or SUB32ri of ESP immediately before this + // instruction, merge the two instructions. + if (NumBytes || MFI->hasVarSizedObjects()) + mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); + + // If dynamic alloca is used, then reset esp to point to the last callee-saved + // slot before popping them off! Same applies for the case, when stack was + // realigned. + if (RegInfo->needsStackRealignment(MF)) { + // We cannot use LEA here, because stack pointer was realigned. We need to + // deallocate local frame back. + if (CSSize) { + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); + MBBI = prior(LastCSPop); + } + + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(FramePtr); + } else if (MFI->hasVarSizedObjects()) { + if (CSSize) { + unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; + MachineInstr *MI = + addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); + MBB.insert(MBBI, MI); + } else { + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) + .addReg(FramePtr); + } + } else if (NumBytes) { + // Adjust stack pointer back: ESP += numbytes. + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo); + } + + // We're returning from function via eh_return. + if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &DestAddr = MBBI->getOperand(0); + assert(DestAddr.isReg() && "Offset should be in register!"); + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), + StackPtr).addReg(DestAddr.getReg()); + } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || + RetOpcode == X86::TCRETURNmi || + RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || + RetOpcode == X86::TCRETURNmi64) { + bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = MBB.getLastNonDebugInstr(); + MachineOperand &JumpTarget = MBBI->getOperand(0); + MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); + assert(StackAdjust.isImm() && "Expecting immediate value."); + + // Adjust stack pointer. + int StackAdj = StackAdjust.getImm(); + int MaxTCDelta = X86FI->getTCReturnAddrDelta(); + int Offset = 0; + assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); + + // Incoporate the retaddr area. + Offset = StackAdj-MaxTCDelta; + assert(Offset >= 0 && "Offset should never be negative"); + + if (Offset) { + // Check for possible merge with preceeding ADD instruction. + Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo); + } + + // Jump to label or value in register. + if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) + ? X86::TAILJMPd : X86::TAILJMPd64)); + if (JumpTarget.isGlobal()) + MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + else { + assert(JumpTarget.isSymbol()); + MIB.addExternalSymbol(JumpTarget.getSymbolName(), + JumpTarget.getTargetFlags()); + } + } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) + ? X86::TAILJMPm : X86::TAILJMPm64)); + for (unsigned i = 0; i != 5; ++i) + MIB.addOperand(MBBI->getOperand(i)); + } else if (RetOpcode == X86::TCRETURNri64) { + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). + addReg(JumpTarget.getReg(), RegState::Kill); + } else { + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = prior(MBBI); + for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && + (X86FI->getTCReturnAddrDelta() < 0)) { + // Add the return addr area delta back since we are not tail calling. + int delta = -1*X86FI->getTCReturnAddrDelta(); + MBBI = MBB.getLastNonDebugInstr(); + + // Check for possible merge with preceeding ADD instruction. + delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); + emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo); + } +} + +void +X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { + // Calculate amount of bytes used for return address storing + int stackGrowth = (STI.is64Bit() ? -8 : -4); + const X86RegisterInfo *RI = TM.getRegisterInfo(); + + // Initial state of the frame pointer is esp+stackGrowth. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(RI->getStackRegister(), stackGrowth); + Moves.push_back(MachineMove(0, Dst, Src)); + + // Add return address to move list + MachineLocation CSDst(RI->getStackRegister(), stackGrowth); + MachineLocation CSSrc(RI->getRARegister()); + Moves.push_back(MachineMove(0, CSDst, CSSrc)); +} + +int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { + const X86RegisterInfo *RI = + static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); + uint64_t StackSize = MFI->getStackSize(); + + if (RI->needsStackRealignment(MF)) { + if (FI < 0) { + // Skip the saved EBP. + Offset += RI->getSlotSize(); + } else { + unsigned Align = MFI->getObjectAlignment(FI); + assert((-(Offset + StackSize)) % Align == 0); + Align = 0; + return Offset + StackSize; + } + // FIXME: Support tail calls + } else { + if (!hasFP(MF)) + return Offset + StackSize; + + // Skip the saved EBP. + Offset += RI->getSlotSize(); + + // Skip the RETADDR move area + const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) + Offset -= TailCallReturnAddrDelta; + } + + return Offset; +} + +bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = MBB.findDebugLoc(MI); + + MachineFunction &MF = *MBB.getParent(); + + bool isWin64 = STI.isTargetWin64(); + unsigned SlotSize = STI.is64Bit() ? 8 : 4; + unsigned FPReg = TRI->getFrameRegister(MF); + unsigned CalleeFrameSize = 0; + + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + + unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + if (Reg == FPReg) + // X86RegisterInfo::emitPrologue will handle spilling of frame register. + continue; + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + RC, TRI); + } + } + + X86FI->setCalleeSavedFrameSize(CalleeFrameSize); + return true; +} + +bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = MBB.findDebugLoc(MI); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned FPReg = TRI->getFrameRegister(MF); + bool isWin64 = STI.isTargetWin64(); + unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (Reg == FPReg) + // X86RegisterInfo::emitEpilogue will handle restoring of frame register. + continue; + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); + } else { + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + RC, TRI); + } + } + return true; +} + +void +X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + unsigned SlotSize = RegInfo->getSlotSize(); + + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + + if (TailCallReturnAddrDelta < 0) { + // create RETURNADDR area + // arg + // arg + // RETADDR + // { ... + // RETADDR area + // ... + // } + // [EBP] + MFI->CreateFixedObject(-TailCallReturnAddrDelta, + (-1U*SlotSize)+TailCallReturnAddrDelta, true); + } + + if (hasFP(MF)) { + assert((TailCallReturnAddrDelta <= 0) && + "The Delta should always be zero or negative"); + const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); + + // Create a frame entry for the EBP register that must be saved. + int FrameIdx = MFI->CreateFixedObject(SlotSize, + -(int)SlotSize + + TFI.getOffsetOfLocalArea() + + TailCallReturnAddrDelta, + true); + assert(FrameIdx == MFI->getObjectIndexBegin() && + "Slot for EBP register must be last in order to be found!"); + FrameIdx = 0; + } +} diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h new file mode 100644 index 0000000..d71108c --- /dev/null +++ b/lib/Target/X86/X86FrameLowering.h @@ -0,0 +1,65 @@ +//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements X86-specific bits of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_FRAMELOWERING_H +#define X86_FRAMELOWERING_H + +#include "X86Subtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class MCSymbol; + class X86TargetMachine; + +class X86FrameLowering : public TargetFrameLowering { + const X86TargetMachine &TM; + const X86Subtarget &STI; +public: + explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti) + : TargetFrameLowering(StackGrowsDown, + sti.getStackAlignment(), + (sti.is64Bit() ? -8 : -4)), + TM(tm), STI(sti) { + } + + void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label, + unsigned FramePtr) const; + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasFP(const MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const; + + void getInitialFrameState(std::vector<MachineMove> &Moves) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index c523441..9b0ec6e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -190,20 +190,19 @@ namespace { SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); - bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); - bool MatchLoad(SDValue N, X86ISelAddressMode &AM); + bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); - bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, + bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, + bool SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, + bool SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); bool SelectScalarSSELoad(SDNode *Root, SDValue N, @@ -264,12 +263,6 @@ namespace { return CurDAG->getTargetConstant(Imm, MVT::i8); } - /// getI16Imm - Return a target constant with the specified value, of type - /// i16. - inline SDValue getI16Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i16); - } - /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -511,10 +504,11 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, NULL, 0, MemVT, + MemTmp, MachinePointerInfo(), MemVT, false, false, 0); - SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp, - NULL, 0, MemVT, false, false, 0); + SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, + MachinePointerInfo(), + MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -536,9 +530,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() { void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI) { const TargetInstrInfo *TII = TM.getInstrInfo(); - if (Subtarget->isTargetCygMing()) + if (Subtarget->isTargetCygMing()) { + unsigned CallOp = + Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32; BuildMI(BB, DebugLoc(), - TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); + TII->get(CallOp)).addExternalSymbol("__main"); + } } void X86DAGToDAGISel::EmitFunctionEntryCode() { @@ -549,29 +546,27 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() { } -bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, - X86ISelAddressMode &AM) { - assert(N.getOpcode() == X86ISD::SegmentBaseAddress); - SDValue Segment = N.getOperand(0); - - if (AM.Segment.getNode() == 0) { - AM.Segment = Segment; - return false; - } - - return true; -} - -bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { +bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ + SDValue Address = N->getOperand(1); + + // load gs:0 -> GS segment register. + // load fs:0 -> FS segment register. + // // This optimization is valid because the GNU TLS model defines that // gs:0 (or fs:0 on X86-64) contains its own address. // For more information see http://people.redhat.com/drepper/tls.pdf - - SDValue Address = N.getOperand(1); - if (Address.getOpcode() == X86ISD::SegmentBaseAddress && - !MatchSegmentBaseAddress (Address, AM)) - return false; - + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) + if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && + Subtarget->isTargetELF()) + switch (N->getPointerInfo().getAddrSpace()) { + case 256: + AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); + return false; + case 257: + AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); + return false; + } + return true; } @@ -690,25 +685,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { return false; } -/// isLogicallyAddWithConstant - Return true if this node is semantically an -/// add of a value with a constantint. -static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) { - // Check for (add x, Cst) - if (V->getOpcode() == ISD::ADD) - return isa<ConstantSDNode>(V->getOperand(1)); - - // Check for (or x, Cst), where Cst & x == 0. - if (V->getOpcode() != ISD::OR || - !isa<ConstantSDNode>(V->getOperand(1))) - return false; - - // Handle "X | C" as "X + C" iff X is known to have C bits clear. - ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1)); - - // Check to see if the LHS & C is zero. - return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue()); -} - bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); @@ -756,11 +732,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; } - case X86ISD::SegmentBaseAddress: - if (!MatchSegmentBaseAddress(N, AM)) - return false; - break; - case X86ISD::Wrapper: case X86ISD::WrapperRIP: if (!MatchWrapper(N, AM)) @@ -768,7 +739,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; case ISD::LOAD: - if (!MatchLoad(N, AM)) + if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM)) return false; break; @@ -799,7 +770,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Okay, we know that we have a scale by now. However, if the scaled // value is an add of something and a constant, we can fold the // constant into the disp field here. - if (isLogicallyAddWithConstant(ShVal, CurDAG)) { + if (CurDAG->isBaseWithConstantOffset(ShVal)) { AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); @@ -943,24 +914,18 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Add an artificial use to this node so that we can keep track of // it if it gets CSE'd with a different node. HandleSDNode Handle(N); - SDValue LHS = Handle.getValue().getNode()->getOperand(0); - SDValue RHS = Handle.getValue().getNode()->getOperand(1); X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(LHS, AM, Depth+1) && - !MatchAddressRecursively(RHS, AM, Depth+1)) + if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && + !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) return false; AM = Backup; - LHS = Handle.getValue().getNode()->getOperand(0); - RHS = Handle.getValue().getNode()->getOperand(1); - + // Try again after commuting the operands. - if (!MatchAddressRecursively(RHS, AM, Depth+1) && - !MatchAddressRecursively(LHS, AM, Depth+1)) + if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& + !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) return false; AM = Backup; - LHS = Handle.getValue().getNode()->getOperand(0); - RHS = Handle.getValue().getNode()->getOperand(1); // If we couldn't fold both operands into the address at the same time, // see if we can just put each operand into a register and fold at least @@ -968,17 +933,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (AM.BaseType == X86ISelAddressMode::RegBase && !AM.Base_Reg.getNode() && !AM.IndexReg.getNode()) { - AM.Base_Reg = LHS; - AM.IndexReg = RHS; + N = Handle.getValue(); + AM.Base_Reg = N.getOperand(0); + AM.IndexReg = N.getOperand(1); AM.Scale = 1; return false; } + N = Handle.getValue(); break; } case ISD::OR: // Handle "X | C" as "X + C" iff X is known to have C bits clear. - if (isLogicallyAddWithConstant(N, CurDAG)) { + if (CurDAG->isBaseWithConstantOffset(N)) { X86ISelAddressMode Backup = AM; ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); uint64_t Offset = CN->getSExtValue(); @@ -1148,10 +1115,30 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { /// SelectAddr - returns true if it is able pattern match an addressing mode. /// It returns the operands which make up the maximal addressing mode it can /// match by reference. -bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, +/// +/// Parent is the parent node of the addr operand that is being matched. It +/// is always a load, store, atomic node, or null. It is only null when +/// checking memory operands for inline asm nodes. +bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; + + if (Parent && + // This list of opcodes are all the nodes that have an "addr:$ptr" operand + // that are not a MemSDNode, and thus don't have proper addrspace info. + Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme + Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores + Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme + unsigned AddrSpace = + cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); + // AddrSpace 256 -> GS, 257 -> FS. + if (AddrSpace == 256) + AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); + if (AddrSpace == 257) + AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); + } + if (MatchAddress(N, AM)) return false; @@ -1187,7 +1174,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); - if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) + if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; return true; } @@ -1205,7 +1192,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); - if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; PatternNodeWithChain = SDValue(LD, 0); return true; @@ -1216,7 +1203,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. -bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, +bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { @@ -1278,7 +1265,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, } /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. -bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, +bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); @@ -1311,7 +1298,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, !IsLegalToFold(N, P, P, OptLevel)) return false; - return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); + return SelectAddr(N.getNode(), + N.getOperand(1), Base, Scale, Index, Disp, Segment); } /// getGlobalBaseReg - Return an SDNode that returns the value of @@ -1329,7 +1317,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue In2L = Node->getOperand(2); SDValue In2H = Node->getOperand(3); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); @@ -1355,7 +1343,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { SDValue Ptr = Node->getOperand(1); SDValue Val = Node->getOperand(2); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return 0; bool isInc = false, isDec = false, isSub = false, isCN = false; @@ -1592,7 +1580,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return RetVal; break; } - + case X86ISD::UMUL: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + unsigned LoReg; + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; + case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; + case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; + case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; + } + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + N0, SDValue()).getValue(1); + + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); + SDValue Ops[] = {N1, InFlag}; + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); + ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); + return NULL; + } + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); @@ -1642,14 +1655,15 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, array_lengthof(Ops)); InFlag = SDValue(CNode, 1); + // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); } else { - InFlag = - SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag); + InFlag = SDValue(CNode, 0); } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1688,7 +1702,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - + return NULL; } @@ -1773,7 +1787,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (isSigned && !signBitIsZero) { // Sign extend the low part into the high part. InFlag = - SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); + SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); } else { // Zero out the high part, effectively zero extending the input. SDValue ClrNode = @@ -1787,14 +1801,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, array_lengthof(Ops)); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); } else { InFlag = - SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0); } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1971,7 +1985,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, case 'v': // not offsetable ?? default: return true; case 'm': // memory - if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4)) + if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4)) return true; break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a6db979..27024b4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16,9 +16,9 @@ #include "X86.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" -#include "X86ShuffleDecode.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" +#include "Utils/X86ShuffleDecode.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -28,6 +28,7 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" +#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -56,39 +57,172 @@ using namespace dwarf; STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> -DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); +Disable256Bit("disable-256bit", cl::Hidden, + cl::desc("Disable use of 256-bit vectors")); // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); +static SDValue Insert128BitVector(SDValue Result, + SDValue Vec, + SDValue Idx, + SelectionDAG &DAG, + DebugLoc dl); + +static SDValue Extract128BitVector(SDValue Vec, + SDValue Idx, + SelectionDAG &DAG, + DebugLoc dl); + +static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG); + + +/// Generate a DAG to grab 128-bits from a vector > 128 bits. This +/// sets things up to match to an AVX VEXTRACTF128 instruction or a +/// simple subregister reference. Idx is an index in the 128 bits we +/// want. It need not be aligned to a 128-bit bounday. That makes +/// lowering EXTRACT_VECTOR_ELT operations easier. +static SDValue Extract128BitVector(SDValue Vec, + SDValue Idx, + SelectionDAG &DAG, + DebugLoc dl) { + EVT VT = Vec.getValueType(); + assert(VT.getSizeInBits() == 256 && "Unexpected vector size!"); + + EVT ElVT = VT.getVectorElementType(); + + int Factor = VT.getSizeInBits() / 128; + + EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), + ElVT, + VT.getVectorNumElements() / Factor); + + // Extract from UNDEF is UNDEF. + if (Vec.getOpcode() == ISD::UNDEF) + return DAG.getNode(ISD::UNDEF, dl, ResultVT); + + if (isa<ConstantSDNode>(Idx)) { + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR + // we can match to VEXTRACTF128. + unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); + + // This is the index of the first element of the 128-bit chunk + // we want. + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) + * ElemsPerChunk); + + SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + + SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, + VecIdx); + + return Result; + } + + return SDValue(); +} + +/// Generate a DAG to put 128-bits into a vector > 128 bits. This +/// sets things up to match to an AVX VINSERTF128 instruction or a +/// simple superregister reference. Idx is an index in the 128 bits +/// we want. It need not be aligned to a 128-bit bounday. That makes +/// lowering INSERT_VECTOR_ELT operations easier. +static SDValue Insert128BitVector(SDValue Result, + SDValue Vec, + SDValue Idx, + SelectionDAG &DAG, + DebugLoc dl) { + if (isa<ConstantSDNode>(Idx)) { + EVT VT = Vec.getValueType(); + assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); + + EVT ElVT = VT.getVectorElementType(); + + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + EVT ResultVT = Result.getValueType(); + + // Insert the relevant 128 bits. + unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); + + // This is the index of the first element of the 128-bit chunk + // we want. + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) + * ElemsPerChunk); + + SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + + Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, + VecIdx); + return Result; + } + + return SDValue(); +} + +/// Given two vectors, concat them. +static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) { + DebugLoc dl = Lower.getDebugLoc(); + + assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!"); + + EVT VT = EVT::getVectorVT(*DAG.getContext(), + Lower.getValueType().getVectorElementType(), + Lower.getValueType().getVectorNumElements() * 2); + + // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors). + assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!"); + + // Insert the upper subvector. + SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper, + DAG.getConstant( + // This is half the length of the result + // vector. Start inserting the upper 128 + // bits here. + Lower.getValueType().getVectorNumElements(), + MVT::i32), + DAG, dl); + + // Insert the lower subvector. + Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl); + return Vec; +} + static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { - - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); - - if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) { - if (is64Bit) return new X8664_MachoTargetObjectFile(); + const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); + bool is64Bit = Subtarget->is64Bit(); + + if (Subtarget->isTargetEnvMacho()) { + if (is64Bit) + return new X8664_MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); - } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){ - if (is64Bit) return new X8664_ELFTargetObjectFile(TM); + } + + if (Subtarget->isTargetELF()) { + if (is64Bit) + return new X8664_ELFTargetObjectFile(TM); return new X8632_ELFTargetObjectFile(TM); - } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) { + } + if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) return new TargetLoweringObjectFileCOFF(); - } llvm_unreachable("unknown subtarget type"); } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<X86Subtarget>(); - X86ScalarSSEf64 = Subtarget->hasSSE2(); - X86ScalarSSEf32 = Subtarget->hasSSE1(); + X86ScalarSSEf64 = Subtarget->hasXMMInt(); + X86ScalarSSEf32 = Subtarget->hasXMM(); X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; RegInfo = TM.getRegisterInfo(); TD = getTargetData(); // Set up the TargetLowering object. + static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird, it always uses i8 for shift amounts and setcc results. setShiftAmountType(MVT::i8); @@ -96,6 +230,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); + if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { + // Setup Windows compiler runtime calls. + setLibcallName(RTLIB::SDIV_I64, "_alldiv"); + setLibcallName(RTLIB::UDIV_I64, "_aulldiv"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2"); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall); + setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C); + setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C); + } + if (Subtarget->isTargetDarwin()) { // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. setUseUnderscoreSetJmp(false); @@ -213,16 +359,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // TODO: when we have SSE, these could be more efficient, by using movd/movq. - if (!X86ScalarSSEf64) { - setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); - setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); + if (!X86ScalarSSEf64) { + setOperationAction(ISD::BITCAST , MVT::f32 , Expand); + setOperationAction(ISD::BITCAST , MVT::i32 , Expand); if (Subtarget->is64Bit()) { - setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); - // Without SSE, i64->f64 goes through memory; i64->MMX is Legal. - if (Subtarget->hasMMX() && !DisableMMX) - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom); - else - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + setOperationAction(ISD::BITCAST , MVT::f64 , Expand); + // Without SSE, i64->f64 goes through memory. + setOperationAction(ISD::BITCAST , MVT::i64 , Expand); } } @@ -236,30 +379,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // (low) operations are left as Legal, as there are single-result // instructions for this in x86. Using the two-result multiply instructions // when both high and low results are needed must be arranged by dagcombine. - setOperationAction(ISD::MULHS , MVT::i8 , Expand); - setOperationAction(ISD::MULHU , MVT::i8 , Expand); - setOperationAction(ISD::SDIV , MVT::i8 , Expand); - setOperationAction(ISD::UDIV , MVT::i8 , Expand); - setOperationAction(ISD::SREM , MVT::i8 , Expand); - setOperationAction(ISD::UREM , MVT::i8 , Expand); - setOperationAction(ISD::MULHS , MVT::i16 , Expand); - setOperationAction(ISD::MULHU , MVT::i16 , Expand); - setOperationAction(ISD::SDIV , MVT::i16 , Expand); - setOperationAction(ISD::UDIV , MVT::i16 , Expand); - setOperationAction(ISD::SREM , MVT::i16 , Expand); - setOperationAction(ISD::UREM , MVT::i16 , Expand); - setOperationAction(ISD::MULHS , MVT::i32 , Expand); - setOperationAction(ISD::MULHU , MVT::i32 , Expand); - setOperationAction(ISD::SDIV , MVT::i32 , Expand); - setOperationAction(ISD::UDIV , MVT::i32 , Expand); - setOperationAction(ISD::SREM , MVT::i32 , Expand); - setOperationAction(ISD::UREM , MVT::i32 , Expand); - setOperationAction(ISD::MULHS , MVT::i64 , Expand); - setOperationAction(ISD::MULHU , MVT::i64 , Expand); - setOperationAction(ISD::SDIV , MVT::i64 , Expand); - setOperationAction(ISD::UDIV , MVT::i64 , Expand); - setOperationAction(ISD::SREM , MVT::i64 , Expand); - setOperationAction(ISD::UREM , MVT::i64 , Expand); + for (unsigned i = 0, e = 4; i != e; ++i) { + MVT VT = IntVTs[i]; + setOperationAction(ISD::MULHS, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + + // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences. + setOperationAction(ISD::ADDC, VT, Custom); + setOperationAction(ISD::ADDE, VT, Custom); + setOperationAction(ISD::SUBC, VT, Custom); + setOperationAction(ISD::SUBE, VT, Custom); + } setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); @@ -276,21 +410,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); - setOperationAction(ISD::CTPOP , MVT::i8 , Expand); setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i8 , Custom); - setOperationAction(ISD::CTPOP , MVT::i16 , Expand); setOperationAction(ISD::CTTZ , MVT::i16 , Custom); setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); if (Subtarget->is64Bit()) { - setOperationAction(ISD::CTPOP , MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Custom); setOperationAction(ISD::CTLZ , MVT::i64 , Custom); } + if (Subtarget->hasPOPCNT()) { + setOperationAction(ISD::CTPOP , MVT::i8 , Promote); + } else { + setOperationAction(ISD::CTPOP , MVT::i8 , Expand); + setOperationAction(ISD::CTPOP , MVT::i16 , Expand); + setOperationAction(ISD::CTPOP , MVT::i32 , Expand); + if (Subtarget->is64Bit()) + setOperationAction(ISD::CTPOP , MVT::i64 , Expand); + } + setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); setOperationAction(ISD::BSWAP , MVT::i16 , Expand); @@ -298,7 +438,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT , MVT::i1 , Promote); // X86 wants to expand cmov itself. setOperationAction(ISD::SELECT , MVT::i8 , Custom); - setOperationAction(ISD::SELECT , MVT::i16 , Custom); + setOperationAction(ISD::SELECT , MVT::i16 , Custom); setOperationAction(ISD::SELECT , MVT::i32 , Custom); setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); @@ -341,12 +481,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } - if (Subtarget->hasSSE1()) + if (Subtarget->hasXMM()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); // We may not have a libcall for MEMBARRIER so we should lower this. setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); - + // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory // operations are flushed before issuing the locked instruction, and they @@ -355,15 +495,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setShouldFoldAtomicFences(true); // Expand certain atomics - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); - - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); + for (unsigned i = 0, e = 4; i != e; ++i) { + MVT VT = IntVTs[i]; + setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom); + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + } if (!Subtarget->is64Bit()) { setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); @@ -415,7 +551,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - if (Subtarget->isTargetCygMing()) + if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); @@ -512,13 +648,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); { - bool ignored; - APFloat TmpFlt(+0.0); - TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, - &ignored); + APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended); addLegalFPImmediate(TmpFlt); // FLD0 TmpFlt.changeSign(); addLegalFPImmediate(TmpFlt); // FLD0/FCHS + + bool ignored; APFloat TmpFlt2(+1.0); TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, &ignored); @@ -564,8 +699,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); - setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); @@ -613,91 +749,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. - if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { - addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); - - addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); - - setOperationAction(ISD::ADD, MVT::v8i8, Legal); - setOperationAction(ISD::ADD, MVT::v4i16, Legal); - setOperationAction(ISD::ADD, MVT::v2i32, Legal); - setOperationAction(ISD::ADD, MVT::v1i64, Legal); - - setOperationAction(ISD::SUB, MVT::v8i8, Legal); - setOperationAction(ISD::SUB, MVT::v4i16, Legal); - setOperationAction(ISD::SUB, MVT::v2i32, Legal); - setOperationAction(ISD::SUB, MVT::v1i64, Legal); - - setOperationAction(ISD::MULHS, MVT::v4i16, Legal); - setOperationAction(ISD::MUL, MVT::v4i16, Legal); - - setOperationAction(ISD::AND, MVT::v8i8, Promote); - AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v4i16, Promote); - AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v2i32, Promote); - AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::AND, MVT::v1i64, Legal); - - setOperationAction(ISD::OR, MVT::v8i8, Promote); - AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v4i16, Promote); - AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v2i32, Promote); - AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::OR, MVT::v1i64, Legal); - - setOperationAction(ISD::XOR, MVT::v8i8, Promote); - AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v4i16, Promote); - AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v2i32, Promote); - AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::XOR, MVT::v1i64, Legal); - - setOperationAction(ISD::LOAD, MVT::v8i8, Promote); - AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v4i16, Promote); - AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v2i32, Promote); - AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v1i64, Legal); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); - - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); - - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - - setOperationAction(ISD::SELECT, MVT::v8i8, Promote); - setOperationAction(ISD::SELECT, MVT::v4i16, Promote); - setOperationAction(ISD::SELECT, MVT::v2i32, Promote); - setOperationAction(ISD::SELECT, MVT::v1i64, Custom); - setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); - setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); - setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); - - if (!X86ScalarSSEf64 && Subtarget->is64Bit()) { - setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); - } - } - - if (!UseSoftFloat && Subtarget->hasSSE1()) { + if (!UseSoftFloat && Subtarget->hasMMX()) { + addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass); + // No operations on x86mmx supported, everything uses intrinsics. + } + + // MMX-sized vectors (other than x86mmx) are expected to be expanded + // into smaller operations. + setOperationAction(ISD::MULHS, MVT::v8i8, Expand); + setOperationAction(ISD::MULHS, MVT::v4i16, Expand); + setOperationAction(ISD::MULHS, MVT::v2i32, Expand); + setOperationAction(ISD::MULHS, MVT::v1i64, Expand); + setOperationAction(ISD::AND, MVT::v8i8, Expand); + setOperationAction(ISD::AND, MVT::v4i16, Expand); + setOperationAction(ISD::AND, MVT::v2i32, Expand); + setOperationAction(ISD::AND, MVT::v1i64, Expand); + setOperationAction(ISD::OR, MVT::v8i8, Expand); + setOperationAction(ISD::OR, MVT::v4i16, Expand); + setOperationAction(ISD::OR, MVT::v2i32, Expand); + setOperationAction(ISD::OR, MVT::v1i64, Expand); + setOperationAction(ISD::XOR, MVT::v8i8, Expand); + setOperationAction(ISD::XOR, MVT::v4i16, Expand); + setOperationAction(ISD::XOR, MVT::v2i32, Expand); + setOperationAction(ISD::XOR, MVT::v1i64, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); + setOperationAction(ISD::SELECT, MVT::v8i8, Expand); + setOperationAction(ISD::SELECT, MVT::v4i16, Expand); + setOperationAction(ISD::SELECT, MVT::v2i32, Expand); + setOperationAction(ISD::SELECT, MVT::v1i64, Expand); + setOperationAction(ISD::BITCAST, MVT::v8i8, Expand); + setOperationAction(ISD::BITCAST, MVT::v4i16, Expand); + setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); + setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); + + if (!UseSoftFloat && Subtarget->hasXMM()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); @@ -714,7 +803,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); } - if (!UseSoftFloat && Subtarget->hasSSE2()) { + if (!UseSoftFloat && Subtarget->hasXMMInt()) { addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM @@ -795,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Do not attempt to promote non-128-bit vectors if (!VT.is128BitVector()) continue; - + setOperationAction(ISD::AND, SVT, Promote); AddPromotedToType (ISD::AND, SVT, MVT::v2i64); setOperationAction(ISD::OR, SVT, Promote); @@ -818,10 +907,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); - if (!DisableMMX && Subtarget->hasMMX()) { - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - } } if (Subtarget->hasSSE41()) { @@ -863,9 +948,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE42()) { + if (Subtarget->hasSSE42()) setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); - } if (!UseSoftFloat && Subtarget->hasAVX()) { addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); @@ -878,27 +962,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::LOAD, MVT::v8i32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); setOperationAction(ISD::LOAD, MVT::v4i64, Legal); + setOperationAction(ISD::FADD, MVT::v8f32, Legal); setOperationAction(ISD::FSUB, MVT::v8f32, Legal); setOperationAction(ISD::FMUL, MVT::v8f32, Legal); setOperationAction(ISD::FDIV, MVT::v8f32, Legal); setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); setOperationAction(ISD::FNEG, MVT::v8f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); - //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom); - //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom); - //setOperationAction(ISD::SELECT, MVT::v8f32, Custom); - //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom); - - // Operations to consider commented out -v16i16 v32i8 - //setOperationAction(ISD::ADD, MVT::v16i16, Legal); - setOperationAction(ISD::ADD, MVT::v8i32, Custom); - setOperationAction(ISD::ADD, MVT::v4i64, Custom); - //setOperationAction(ISD::SUB, MVT::v32i8, Legal); - //setOperationAction(ISD::SUB, MVT::v16i16, Legal); - setOperationAction(ISD::SUB, MVT::v8i32, Custom); - setOperationAction(ISD::SUB, MVT::v4i64, Custom); - //setOperationAction(ISD::MUL, MVT::v16i16, Legal); + setOperationAction(ISD::FADD, MVT::v4f64, Legal); setOperationAction(ISD::FSUB, MVT::v4f64, Legal); setOperationAction(ISD::FMUL, MVT::v4f64, Legal); @@ -906,85 +977,66 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); setOperationAction(ISD::FNEG, MVT::v4f64, Custom); - setOperationAction(ISD::VSETCC, MVT::v4f64, Custom); - // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom); - // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom); - setOperationAction(ISD::VSETCC, MVT::v8i32, Custom); - - // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom); - // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom); - // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom); - -#if 0 - // Not sure we want to do this since there are no 256-bit integer - // operations in AVX - - // Custom lower build_vector, vector_shuffle, and extract_vector_elt. - // This includes 256-bit vectors - for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) { - EVT VT = (MVT::SimpleValueType)i; - - // Do not attempt to custom lower non-power-of-2 vectors - if (!isPowerOf2_32(VT.getVectorNumElements())) + // Custom lower build_vector, vector_shuffle, scalar_to_vector, + // insert_vector_elt extract_subvector and extract_vector_elt for + // 256-bit types. + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; + ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + // Do not attempt to custom lower non-256-bit vectors + if (!isPowerOf2_32(MVT(VT).getVectorNumElements()) + || (MVT(VT).getSizeInBits() < 256)) continue; - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - } + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + } + // Custom-lower insert_subvector and extract_subvector based on + // the result type. + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; + ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + // Do not attempt to custom lower non-256-bit vectors + if (!isPowerOf2_32(MVT(VT).getVectorNumElements())) + continue; - if (Subtarget->is64Bit()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom); + if (MVT(VT).getSizeInBits() == 128) { + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); + } + else if (MVT(VT).getSizeInBits() == 256) { + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + } } -#endif - -#if 0 - // Not sure we want to do this since there are no 256-bit integer - // operations in AVX - // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64. - // Including 256-bit vectors - for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) { - EVT VT = (MVT::SimpleValueType)i; + // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. + // Don't promote loads because we need them for VPERM vector index versions. - if (!VT.is256BitVector()) { + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; + VT++) { + if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements()) + || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256)) continue; - } - setOperationAction(ISD::AND, VT, Promote); - AddPromotedToType (ISD::AND, VT, MVT::v4i64); - setOperationAction(ISD::OR, VT, Promote); - AddPromotedToType (ISD::OR, VT, MVT::v4i64); - setOperationAction(ISD::XOR, VT, Promote); - AddPromotedToType (ISD::XOR, VT, MVT::v4i64); - setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); - setOperationAction(ISD::SELECT, VT, Promote); - AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); + setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v4i64); + setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v4i64); + setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v4i64); + //setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); + //AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v4i64); + setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); + AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64); } - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); -#endif } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - // Add/Sub/Mul with overflow operations are custom lowered. - setOperationAction(ISD::SADDO, MVT::i32, Custom); - setOperationAction(ISD::UADDO, MVT::i32, Custom); - setOperationAction(ISD::SSUBO, MVT::i32, Custom); - setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::SMULO, MVT::i32, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -992,14 +1044,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: We really should do custom legalization for addition and // subtraction on x86-32 once PR3203 is fixed. We really can't do much better // than generic legalization for 64-bit multiplication-with-overflow, though. - if (Subtarget->is64Bit()) { - setOperationAction(ISD::SADDO, MVT::i64, Custom); - setOperationAction(ISD::UADDO, MVT::i64, Custom); - setOperationAction(ISD::SSUBO, MVT::i64, Custom); - setOperationAction(ISD::USUBO, MVT::i64, Custom); - setOperationAction(ISD::SMULO, MVT::i64, Custom); + for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) { + // Add/Sub/Mul with overflow operations are custom lowered. + MVT VT = IntVTs[i]; + setOperationAction(ISD::SADDO, VT, Custom); + setOperationAction(ISD::UADDO, VT, Custom); + setOperationAction(ISD::SSUBO, VT, Custom); + setOperationAction(ISD::USUBO, VT, Custom); + setOperationAction(ISD::SMULO, VT, Custom); + setOperationAction(ISD::UMULO, VT, Custom); } + // There are no 8-bit 3-address imul/mul instructions + setOperationAction(ISD::SMULO, MVT::i8, Expand); + setOperationAction(ISD::UMULO, MVT::i8, Expand); + if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, 0); @@ -1016,6 +1075,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ZERO_EXTEND); if (Subtarget->is64Bit()) @@ -1023,11 +1085,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); - // FIXME: These should be based on subtarget info. Plus, the values should - // be smaller when we are in optimizing for size mode. + // On Darwin, -Os means optimize for size without hurting performance, + // do not reduce the limit. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores + maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores + maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores + maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; setPrefLoopAlignment(16); benefitFromCodePlacementOpt = true; } @@ -1078,7 +1143,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { } unsigned Align = 4; - if (Subtarget->hasSSE1()) + if (Subtarget->hasXMM()) getMaxByValAlign(Ty, Align); return Align; } @@ -1119,7 +1184,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && Subtarget->getStackAlignment() >= 8 && - Subtarget->hasSSE2()) { + Subtarget->hasXMMInt()) { // Do not use f64 to lower memcpy if source is string constant. It's // better to use i32 to avoid the loads. return MVT::f64; @@ -1139,21 +1204,11 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && Subtarget->isPICStyleGOT()) return MachineJumpTableInfo::EK_Custom32; - + // Otherwise, use the normal jump table encoding heuristics. return TargetLowering::getJumpTableEncoding(); } -/// getPICBaseSymbol - Return the X86-32 PIC base. -MCSymbol * -X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF, - MCContext &Ctx) const { - const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo(); - return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ - Twine(MF->getFunctionNumber())+"$pb"); -} - - const MCExpr * X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, @@ -1188,7 +1243,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); // Otherwise, the reference is relative to the PIC base. - return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx); + return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx); } /// getFunctionAlignment - Return the Log2 alignment of this function. @@ -1196,6 +1251,7 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } +// FIXME: Why this routine is here? Move to RegInfo! std::pair<const TargetRegisterClass*, uint8_t> X86TargetLowering::findRepresentativeClass(EVT VT) const{ const TargetRegisterClass *RRC = 0; @@ -1207,8 +1263,7 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ RRC = (Subtarget->is64Bit() ? X86::GR64RegisterClass : X86::GR32RegisterClass); break; - case MVT::v8i8: case MVT::v4i16: - case MVT::v2i32: case MVT::v1i64: + case MVT::x86mmx: RRC = X86::VR64RegisterClass; break; case MVT::f32: case MVT::f64: @@ -1222,10 +1277,13 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ return std::make_pair(RRC, Cost); } +// FIXME: Why this routine is here? Move to RegInfo! unsigned X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0; + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; switch (RC->getID()) { default: return 0; @@ -1267,7 +1325,7 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, #include "X86GenCallingConv.inc" -bool +bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { @@ -1312,16 +1370,18 @@ X86TargetLowering::LowerReturn(SDValue Chain, SDValue ValToCopy = OutVals[i]; EVT ValVT = ValToCopy.getValueType(); - // If this is x86-64, and we disabled SSE, we can't return FP values - if ((ValVT == MVT::f32 || ValVT == MVT::f64) && - (Subtarget->is64Bit() && !Subtarget->hasSSE1())) { + // If this is x86-64, and we disabled SSE, we can't return FP values, + // or SSE or MMX vectors. + if ((ValVT == MVT::f32 || ValVT == MVT::f64 || + VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && + (Subtarget->is64Bit() && !Subtarget->hasXMM())) { report_fatal_error("SSE register return with SSE disabled"); } // Likewise we can't return F64 values with SSE1 only. gcc does so, but // llvm-gcc has never done it right and no one has noticed, so this // should be OK for now. if (ValVT == MVT::f64 && - (Subtarget->is64Bit() && !Subtarget->hasSSE2())) + (Subtarget->is64Bit() && !Subtarget->hasXMMInt())) report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to @@ -1340,20 +1400,19 @@ X86TargetLowering::LowerReturn(SDValue Chain, // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { - if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { - ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); + if (ValVT == MVT::x86mmx) { if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { + ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy); ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); - // If we don't have SSE2 available, convert to v4f32 so the generated // register is legal. if (!Subtarget->hasSSE2()) - ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy); + ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy); } } } - + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); Flag = Chain.getValue(1); } @@ -1367,7 +1426,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); - assert(Reg && + assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); @@ -1388,6 +1447,28 @@ X86TargetLowering::LowerReturn(SDValue Chain, MVT::Other, &RetOps[0], RetOps.size()); } +bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { + if (N->getNumValues() != 1) + return false; + if (!N->hasNUsesOfValue(1, 0)) + return false; + + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() != ISD::CopyToReg && + Copy->getOpcode() != ISD::FP_EXTEND) + return false; + + bool HasRet = false; + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != X86ISD::RET_FLAG) + return false; + HasRet = true; + } + + return HasRet; +} + /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// @@ -1412,7 +1493,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && - ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { + ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) { report_fatal_error("SSE register return with SSE disabled"); } @@ -1433,7 +1514,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; SDValue Ops[] = { Chain, InFlag }; - Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag, + Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue, Ops, 2), 1); Val = Chain.getValue(0); @@ -1456,7 +1537,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, MVT::i64, InFlag).getValue(1); Val = Chain.getValue(0); } - Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val); + Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val); } else { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag).getValue(1); @@ -1499,30 +1580,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { return Ins[0].Flags.isSRet(); } -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { - if (Subtarget->is64Bit()) { - if (CC == CallingConv::GHC) - return CC_X86_64_GHC; - else if (Subtarget->isTargetWin64()) - return CC_X86_Win64_C; - else - return CC_X86_64_C; - } - - if (CC == CallingConv::X86_FastCall) - return CC_X86_32_FastCall; - else if (CC == CallingConv::X86_ThisCall) - return CC_X86_32_ThisCall; - else if (CC == CallingConv::Fast) - return CC_X86_32_FastCC; - else if (CC == CallingConv::GHC) - return CC_X86_32_GHC; - else - return CC_X86_32_C; -} - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1531,10 +1588,11 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, DebugLoc dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, - NULL, 0, NULL, 0); + MachinePointerInfo(), MachinePointerInfo()); } /// IsTailCallConvention - Return true if the calling convention is one that @@ -1583,7 +1641,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); } } @@ -1617,7 +1675,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); + + // Allocate shadow area for Win64 + if (IsWin64) { + CCInfo.AllocateStack(32, 8); + } + + CCInfo.AnalyzeFormalArguments(Ins, CC_X86); unsigned LastVal = ~0U; SDValue ArgValue; @@ -1644,12 +1708,12 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = X86::VR256RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) RC = X86::VR128RegisterClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) + else if (RegVT == MVT::x86mmx) RC = X86::VR64RegisterClass; else llvm_unreachable("Unknown argument type!"); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 @@ -1662,14 +1726,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::BCvt) - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. if (RegVT.isVector()) { - ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, - ArgValue, DAG.getConstant(0, MVT::i64)); - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), + ArgValue); } else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } @@ -1680,8 +1743,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) - ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0, - false, false, 0); + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, + MachinePointerInfo(), false, false, 0); InVals.push_back(ArgValue); } @@ -1708,8 +1771,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (Is64Bit || (CallConv != CallingConv::X86_FastCall && - CallConv != CallingConv::X86_ThisCall)) { + if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall && + CallConv != CallingConv::X86_ThisCall))) { FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); } if (Is64Bit) { @@ -1719,9 +1782,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, static const unsigned GPR64ArgRegsWin64[] = { X86::RCX, X86::RDX, X86::R8, X86::R9 }; - static const unsigned XMMArgRegsWin64[] = { - X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 - }; static const unsigned GPR64ArgRegs64Bit[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 }; @@ -1729,40 +1789,52 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; - const unsigned *GPR64ArgRegs, *XMMArgRegs; + const unsigned *GPR64ArgRegs; + unsigned NumXMMRegs = 0; if (IsWin64) { - TotalNumIntRegs = 4; TotalNumXMMRegs = 4; + // The XMM registers which might contain var arg parameters are shadowed + // in their paired GPR. So we only need to save the GPR to their home + // slots. + TotalNumIntRegs = 4; GPR64ArgRegs = GPR64ArgRegsWin64; - XMMArgRegs = XMMArgRegsWin64; } else { TotalNumIntRegs = 6; TotalNumXMMRegs = 8; GPR64ArgRegs = GPR64ArgRegs64Bit; - XMMArgRegs = XMMArgRegs64Bit; + + NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs); } unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, TotalNumIntRegs); - unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, - TotalNumXMMRegs); bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); - assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && + assert(!(NumXMMRegs && !Subtarget->hasXMM()) && "SSE register cannot be used when SSE is disabled!"); assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) + if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; - // For X86-64, if there are vararg parameters that are passed via - // registers, then we must store them to their spots on the stack so they - // may be loaded by deferencing the result of va_next. - FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); - FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16); - FuncInfo->setRegSaveFrameIndex( - MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16, + if (IsWin64) { + const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering(); + // Get to the caller-allocated home save location. Add 8 to account + // for the return address. + int HomeOffset = TFI.getOffsetOfLocalArea() + 8; + FuncInfo->setRegSaveFrameIndex( + MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); + FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); + } else { + // For X86-64, if there are vararg parameters that are passed via + // registers, then we must store them to their spots on the stack so they + // may be loaded by deferencing the result of va_next. + FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); + FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16); + FuncInfo->setRegSaveFrameIndex( + MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16, false)); + } // Store the integer parameter registers. SmallVector<SDValue, 8> MemOps; @@ -1773,13 +1845,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass); + X86::GR64RegisterClass, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack( - FuncInfo->getRegSaveFrameIndex()), - Offset, false, false, 0); + MachinePointerInfo::getFixedStack( + FuncInfo->getRegSaveFrameIndex(), Offset), + false, false, 0); MemOps.push_back(Store); Offset += 8; } @@ -1789,7 +1861,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<SDValue, 11> SaveXMMOps; SaveXMMOps.push_back(Chain); - unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl); SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); @@ -1799,8 +1871,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->getVarArgsFPOffset())); for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { - unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], - X86::VR128RegisterClass); + unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], + X86::VR128RegisterClass, dl); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); SaveXMMOps.push_back(Val); } @@ -1843,15 +1915,14 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { - const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); - unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset(); + unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - if (Flags.isByVal()) { + if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); - } + return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset, + MachinePointerInfo::getStack(LocMemOffset), false, false, 0); } @@ -1867,7 +1938,8 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. - OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0); + OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(), + false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } @@ -1886,7 +1958,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0, + MachinePointerInfo::getFixedStack(NewReturnAddrFI), false, false, 0); return Chain; } @@ -1902,6 +1974,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); + bool IsWin64 = Subtarget->isTargetWin64(); bool IsStructRet = CallIsStructReturn(Outs); bool IsSibcall = false; @@ -1927,7 +2000,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); + + // Allocate shadow area for Win64 + if (IsWin64) { + CCInfo.AllocateStack(32, 8); + } + + CCInfo.AnalyzeCallOperands(Outs, CC_X86); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -1986,21 +2065,21 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CCValAssign::AExt: if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { // Special case: passing MMX values in XMM registers. - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); } else Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg); break; case CCValAssign::Indirect: { // Store the argument. SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0); Arg = SpillSlot; break; @@ -2009,7 +2088,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - if (isVarArg && Subtarget->isTargetWin64()) { + if (isVarArg && IsWin64) { // Win64 ABI requires argument XMM reg to be copied to the corresponding // shadow reg if callee is a varargs function. unsigned ShadowReg = 0; @@ -2075,7 +2154,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } - if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) { + if (Is64Bit && isVarArg && !IsWin64) { // From AMD64 ABI document: // For calls that may call functions that use varargs or stdargs // (prototype-less calls or calls to functions containing ellipsis (...) in @@ -2090,7 +2169,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); - assert((Subtarget->hasSSE1() || !NumXMMRegs) + assert((Subtarget->hasXMM() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); Chain = DAG.getCopyToReg(Chain, dl, X86::AL, @@ -2143,7 +2222,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Store relative to framepointer. MemOpChains2.push_back( DAG.getStore(ArgChain, dl, Arg, FIN, - PseudoSourceValue::getFixedStack(FI), 0, + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } @@ -2192,8 +2271,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - (GV->isDeclaration() || GV->isWeakForLinker()) && - Subtarget->getDarwinVers() < 9) { + (GV->isDeclaration() || GV->isWeakForLinker()) && + Subtarget->getDarwinVers() < 9) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -2206,13 +2285,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { unsigned char OpFlags = 0; - // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external - // symbols should go through the PLT. + // On ELF targets, in either X86-64 or X86-32 mode, direct calls to + // external symbols should go through the PLT. if (Subtarget->isTargetELF() && getTargetMachine().getRelocationModel() == Reloc::PIC_) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - Subtarget->getDarwinVers() < 9) { + Subtarget->getDarwinVers() < 9) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -2224,7 +2303,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; if (!IsSibcall && isTailCall) { @@ -2250,7 +2329,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); // Add an implicit use of AL for non-Windows x86 64-bit vararg functions. - if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) + if (Is64Bit && isVarArg && !IsWin64) Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); if (InFlag.getNode()) @@ -2337,7 +2416,7 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const TargetMachine &TM = MF.getTarget(); - const TargetFrameInfo &TFI = *TM.getFrameInfo(); + const TargetFrameLowering &TFI = *TM.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); uint64_t AlignMask = StackAlignment - 1; int64_t Offset = StackSize; @@ -2364,7 +2443,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); - if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) + if (!TargetRegisterInfo::isVirtualRegister(VR)) return false; MachineInstr *Def = MRI->getVRegDef(VR); if (!Def) @@ -2510,14 +2589,17 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + + // Allocate shadow area for Win64 + if (Subtarget->isTargetWin64()) { + CCInfo.AllocateStack(32, 8); + } + + CCInfo.AnalyzeCallOperands(Outs, CC_X86); if (CCInfo.getNextStackOffset()) { MachineFunction &MF = DAG.getMachineFunction(); if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) return false; - if (Subtarget->isTargetWin64()) - // Win64 ABI has additional complications. - return false; // Check if the arguments are already laid out in the right way as // the caller's fixed stack objects. @@ -2564,6 +2646,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } + // An stdcall caller is expected to clean up its arguments; the callee + // isn't going to do that. + if (!CCMatch && CallerCC==CallingConv::X86_StdCall) + return false; + return true; } @@ -2592,6 +2679,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::SHUFPD: + case X86ISD::PALIGN: case X86ISD::SHUFPS: case X86ISD::MOVLHPS: case X86ISD::MOVLHPD: @@ -2600,6 +2688,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVLPD: case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: + case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::UNPCKLPS: @@ -2625,6 +2714,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::MOVSHDUP: case X86ISD::MOVSLDUP: + case X86ISD::MOVDDUP: return DAG.getNode(Opc, dl, VT, V1); } @@ -2648,6 +2738,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); + case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: return DAG.getNode(Opc, dl, VT, V1, V2, @@ -2770,8 +2861,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, // First determine if it is required or is profitable to flip the operands. // If LHS is a foldable load, but RHS is not, flip the condition. - if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) && - !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) { + if (ISD::isNON_EXTLoad(LHS.getNode()) && + !ISD::isNON_EXTLoad(RHS.getNode())) { SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); std::swap(LHS, RHS); } @@ -2865,7 +2956,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) { /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) { - if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) + if (VT == MVT::v4f32 || VT == MVT::v4i32 ) return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); if (VT == MVT::v2f64 || VT == MVT::v2i64) return (Mask[0] < 2 && Mask[1] < 2); @@ -2933,15 +3024,15 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, bool hasSSSE3) { int i, e = VT.getVectorNumElements(); - + // Do not handle v2i64 / v2f64 shuffles with palignr. if (e < 4 || !hasSSSE3) return false; - + for (i = 0; i != e; ++i) if (Mask[i] >= 0) break; - + // All undef, not a palignr. if (i == e) return false; @@ -2952,13 +3043,13 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, bool NeedsUnary = false; int s = Mask[i] - i; - + // Check the rest of the elements to see if they are consecutive. for (++i; i != e; ++i) { int m = Mask[i]; - if (m < 0) + if (m < 0) continue; - + Unary = Unary && (m < (int)e); NeedsUnary = NeedsUnary || (m < s); @@ -3046,10 +3137,10 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { /// <2, 3, 2, 3> bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); - + if (NumElems != 4) return false; - + return isUndefOrEqual(N->getMaskElt(0), 2) && isUndefOrEqual(N->getMaskElt(1), 3) && isUndefOrEqual(N->getMaskElt(2), 2) && @@ -3320,6 +3411,44 @@ bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { return true; } +/// isVEXTRACTF128Index - Return true if the specified +/// EXTRACT_SUBVECTOR operand specifies a vector extract that is +/// suitable for input to VEXTRACTF128. +bool X86::isVEXTRACTF128Index(SDNode *N) { + if (!isa<ConstantSDNode>(N->getOperand(1).getNode())) + return false; + + // The index should be aligned on a 128-bit boundary. + uint64_t Index = + cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); + + unsigned VL = N->getValueType(0).getVectorNumElements(); + unsigned VBits = N->getValueType(0).getSizeInBits(); + unsigned ElSize = VBits / VL; + bool Result = (Index * ElSize) % 128 == 0; + + return Result; +} + +/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR +/// operand specifies a subvector insert that is suitable for input to +/// VINSERTF128. +bool X86::isVINSERTF128Index(SDNode *N) { + if (!isa<ConstantSDNode>(N->getOperand(2).getNode())) + return false; + + // The index should be aligned on a 128-bit boundary. + uint64_t Index = + cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + + unsigned VL = N->getValueType(0).getVectorNumElements(); + unsigned VBits = N->getValueType(0).getSizeInBits(); + unsigned ElSize = VBits / VL; + bool Result = (Index * ElSize) % 128 == 0; + + return Result; +} + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. unsigned X86::getShuffleSHUFImmediate(SDNode *N) { @@ -3388,6 +3517,42 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { return (Val - i) * EltSize; } +/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate +/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128 +/// instructions. +unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) { + if (!isa<ConstantSDNode>(N->getOperand(1).getNode())) + llvm_unreachable("Illegal extract subvector for VEXTRACTF128"); + + uint64_t Index = + cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); + + EVT VecVT = N->getOperand(0).getValueType(); + EVT ElVT = VecVT.getVectorElementType(); + + unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); + + return Index / NumElemsPerChunk; +} + +/// getInsertVINSERTF128Immediate - Return the appropriate immediate +/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128 +/// instructions. +unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { + if (!isa<ConstantSDNode>(N->getOperand(2).getNode())) + llvm_unreachable("Illegal insert subvector for VINSERTF128"); + + uint64_t Index = + cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); + + EVT VecVT = N->getValueType(0); + EVT ElVT = VecVT.getVectorElementType(); + + unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits(); + + return Index / NumElemsPerChunk; +} + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { @@ -3537,13 +3702,10 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - if (VT.getSizeInBits() == 64) { // MMX - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - } else if (VT.getSizeInBits() == 128) { + if (VT.getSizeInBits() == 128) { // SSE if (HasSSE2) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); @@ -3559,7 +3721,7 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); } - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } /// getOnesVector - Returns a vector of specified type with all bits set. @@ -3571,11 +3733,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { // type. This ensures they get CSE'd. SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (VT.getSizeInBits() == 64) // MMX - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else // SSE - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -3640,9 +3799,6 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, /// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32. static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { - if (SV->getValueType(0).getVectorNumElements() <= 4) - return SDValue(SV, 0); - EVT PVT = MVT::v4f32; EVT VT = SV->getValueType(0); DebugLoc dl = SV->getDebugLoc(); @@ -3663,9 +3819,9 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // Perform the splat. int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); + V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1); V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, V1); } /// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified @@ -3789,7 +3945,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, } // Actual nodes that may contain scalar elements - if (Opcode == ISD::BIT_CONVERT) { + if (Opcode == ISD::BITCAST) { V = V.getOperand(0); EVT SrcVT = V.getValueType(); unsigned NumElems = VT.getVectorNumElements(); @@ -3978,7 +4134,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } } - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V); } /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. @@ -4017,11 +4173,10 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { - bool isMMX = VT.getSizeInBits() == 64; - EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; + EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; - SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); } @@ -4029,7 +4184,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SDValue X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, SelectionDAG &DAG) const { - + // Check if the scalar load can be widened into a vector load. And if // the address is "base + cst" see if the cst can be "absorbed" into // the shuffle mask. @@ -4046,8 +4201,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { FI = FINode->getIndex(); Offset = 0; - } else if (Ptr.getOpcode() == ISD::ADD && - isa<ConstantSDNode>(Ptr.getOperand(1)) && + } else if (DAG.isBaseWithConstantOffset(Ptr) && isa<FrameIndexSDNode>(Ptr.getOperand(0))) { FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); Offset = Ptr.getConstantOperandVal(1); @@ -4084,41 +4238,42 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0, + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(StartOffset), false, false, 0); // Canonicalize it to a v4i32 shuffle. - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getVectorShuffle(MVT::v4i32, dl, V1, - DAG.getUNDEF(MVT::v4i32), &Mask[0])); + DAG.getUNDEF(MVT::v4i32),&Mask[0])); } return SDValue(); } -/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a -/// vector of type 'VT', see if the elements can be replaced by a single large +/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a +/// vector of type 'VT', see if the elements can be replaced by a single large /// load which has the same value as a build_vector whose operands are 'elts'. /// /// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a -/// +/// /// FIXME: we'd also like to handle the case where the last elements are zero /// rather than undef via VZEXT_LOAD, but we do not detect that case today. /// There's even a handy isZeroNode for that purpose. static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, - DebugLoc &dl, SelectionDAG &DAG) { + DebugLoc &DL, SelectionDAG &DAG) { EVT EltVT = VT.getVectorElementType(); unsigned NumElems = Elts.size(); - + LoadSDNode *LDBase = NULL; unsigned LastLoadedElt = -1U; - + // For each element in the initializer, see if we've found a load or an undef. - // If we don't find an initial load element, or later load elements are + // If we don't find an initial load element, or later load elements are // non-consecutive, bail out. for (unsigned i = 0; i < NumElems; ++i) { SDValue Elt = Elts[i]; - + if (!Elt.getNode() || (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) return SDValue(); @@ -4143,18 +4298,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, // consecutive loads for the low half, generate a vzext_load node. if (LastLoadedElt == NumElems - 1) { if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) - return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), - LDBase->getSrcValue(), LDBase->getSrcValueOffset(), + return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), + LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), 0); - return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), - LDBase->getSrcValue(), LDBase->getSrcValueOffset(), + return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), + LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), LDBase->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); + SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, + Ops, 2, MVT::i32, + LDBase->getMemOperand()); + return DAG.getNode(ISD::BITCAST, DL, VT, ResNode); } return SDValue(); } @@ -4162,6 +4319,35 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); + + EVT VT = Op.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + + unsigned NumElems = Op.getNumOperands(); + + // For AVX-length vectors, build the individual 128-bit pieces and + // use shuffles to put them in place. + if (VT.getSizeInBits() > 256 && + Subtarget->hasAVX() && + !Disable256Bit && + !ISD::isBuildVectorAllZeros(Op.getNode())) { + SmallVector<SDValue, 8> V; + V.resize(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + V[i] = Op.getOperand(i); + } + + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); + + // Build the lower subvector. + SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); + // Build the upper subvector. + SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], + NumElems/2); + + return ConcatVectors(Lower, Upper, DAG); + } + // All zero's are handled with pxor in SSE2 and above, xorps in SSE1. // All one's are handled with pcmpeqd. In AVX, zero's are handled with // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd @@ -4169,10 +4355,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ISD::isBuildVectorAllZeros(Op.getNode()) || (Op.getValueType().getSizeInBits() != 256 && ISD::isBuildVectorAllOnes(Op.getNode()))) { - // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to + // Canonicalize this to <4 x i32> (SSE) to // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are // eliminated on x86-32 hosts. - if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) + if (Op.getValueType() == MVT::v4i32) return Op; if (ISD::isBuildVectorAllOnes(Op.getNode())) @@ -4180,11 +4366,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); } - EVT VT = Op.getValueType(); - EVT ExtVT = VT.getVectorElementType(); unsigned EVTBits = ExtVT.getSizeInBits(); - unsigned NumElems = Op.getNumOperands(); unsigned NumZero = 0; unsigned NumNonZero = 0; unsigned NonZeros = 0; @@ -4223,9 +4406,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { - // Handle MMX and SSE both. - EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; - unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; + // Handle SSE only. + assert(VT == MVT::v2i64 && "Expected an SSE value type!"); + EVT VecVT = MVT::v4i32; + unsigned VecElts = 4; // Truncate the value (which may itself be a constant) to i32, and // convert it to a vector with movd (S2V+shuffle to zero extend). @@ -4245,7 +4429,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG.getUNDEF(Item.getValueType()), &Mask[0]); } - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item); } } @@ -4264,11 +4448,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG); } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + EVT MiddleVT = MVT::v4i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); + return DAG.getNode(ISD::BITCAST, dl, VT, Item); } } @@ -4394,20 +4579,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Check for a build vector of consecutive loads. for (unsigned i = 0; i < NumElems; ++i) V[i] = Op.getOperand(i); - + // Check for elements which are consecutive loads. SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG); if (LD.getNode()) return LD; - - // For SSE 4.1, use insertps to put the high elements into the low element. + + // For SSE 4.1, use insertps to put the high elements into the low element. if (getSubtarget()->hasSSE41()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); else Result = DAG.getUNDEF(VT); - + for (unsigned i = 1; i < NumElems; ++i) { if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue; Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, @@ -4415,7 +4600,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } return Result; } - + // Otherwise, expand into a number of unpckl*, start by extending each of // our (non-undef) elements to the full vector width with the element in the // bottom slot of the vector (which generates no code for SSE). @@ -4441,7 +4626,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (V[i+EltStride].getOpcode() == ISD::UNDEF && EltStride == NumElems/2) continue; - + V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]); } EltStride >>= 1; @@ -4461,21 +4646,21 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || ResVT == MVT::v8i16 || ResVT == MVT::v16i8); int Mask[2]; - SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0)); + SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0)); SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); InVec = Op.getOperand(1); if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { unsigned NumElts = ResVT.getVectorNumElements(); - VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); + VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); } else { - InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec); + InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec); SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); Mask[0] = 0; Mask[1] = 2; VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); } - return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); + return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -4557,9 +4742,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); NewV = DAG.getVectorShuffle(MVT::v2i64, dl, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); - NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1), + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]); + NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV); // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the // source words for the shuffle, to aid later transformations. @@ -4628,12 +4813,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8)); } - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1); + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1); V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); if (!TwoInputs) - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); // Calculate the shuffle mask for the second input, shuffle it, and // OR it with the first shuffled input. @@ -4648,12 +4833,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8)); } - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2); + V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2); V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); + return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); } // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order, @@ -4820,8 +5005,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, // No SSSE3 - Calculate in place words and then fix all out of place words // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from // the 16 different words that comprise the two doublequadword input vectors. - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2); + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); SDValue NewV = V2Only ? V2 : V1; for (int i = 0; i != 8; ++i) { int Elt0 = MaskVals[i*2]; @@ -4883,25 +5068,23 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt, DAG.getIntPtrConstant(i)); } - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV); } /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide -/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be +/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in /// the right sequence. e.g. -/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> +/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15> static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, - const TargetLowering &TLI, DebugLoc dl) { + SelectionDAG &DAG, DebugLoc dl) { EVT VT = SVOp->getValueType(0); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; - EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32; - EVT NewVT = MaskVT; + EVT NewVT; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); case MVT::v4f32: NewVT = MVT::v2f64; break; @@ -4910,12 +5093,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, case MVT::v16i8: NewVT = MVT::v4i32; break; } - if (NewWidth == 2) { - if (VT.isInteger()) - NewVT = MVT::v2i64; - else - NewVT = MVT::v2f64; - } int Scale = NumElems / NewWidth; SmallVector<int, 8> MaskVec; for (unsigned i = 0; i < NumElems; i += Scale) { @@ -4935,8 +5112,8 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, MaskVec.push_back(StartIdx / Scale); } - V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); - V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); + V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); } @@ -4953,13 +5130,13 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, // movssrr and movsdrr do not clear top bits. Try to use movd, movq // instead. MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; - if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) && + if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) && SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && - SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && + SrcOp.getOperand(0).getOpcode() == ISD::BITCAST && SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) { // PR2108 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OpVT, @@ -4969,9 +5146,9 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, } } - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, OpVT, SrcOp))); } @@ -5125,7 +5302,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } static bool MayFoldVectorLoad(SDValue V) { - if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT) + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); @@ -5134,39 +5311,143 @@ static bool MayFoldVectorLoad(SDValue V) { return false; } +// FIXME: the version above should always be used. Since there's +// a bug where several vector shuffles can't be folded because the +// DAG is not updated during lowering and a node claims to have two +// uses while it only has one, use this version, and let isel match +// another instruction if the load really happens to have more than +// one use. Remove this version after this bug get fixed. +// rdar://8434668, PR8156 +static bool RelaxedMayFoldVectorLoad(SDValue V) { + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) + V = V.getOperand(0); + if (ISD::isNormalLoad(V.getNode())) + return true; + return false; +} + +/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by +/// a vector extract, and if both can be later optimized into a single load. +/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked +/// here because otherwise a target specific shuffle node is going to be +/// emitted for this shuffle, and the optimization not done. +/// FIXME: This is probably not the best approach, but fix the problem +/// until the right path is decided. static -SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, - bool HasSSE2) { - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - EVT VT = Op.getValueType(); +bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = V.getValueType(); + ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V); - assert(VT != MVT::v2i64 && "unsupported shuffle type"); + // Be sure that the vector shuffle is present in a pattern like this: + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr) + if (!V.hasOneUse()) + return false; - if (HasSSE2 && VT == MVT::v2f64) - return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG); + SDNode *N = *V.getNode()->use_begin(); + if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; - // v4f32 or v4i32 - return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG); -} + SDValue EltNo = N->getOperand(1); + if (!isa<ConstantSDNode>(EltNo)) + return false; -static -SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) { - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - EVT VT = Op.getValueType(); + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + bool HasShuffleIntoBitcast = false; + if (V.getOpcode() == ISD::BITCAST) { + EVT SrcVT = V.getOperand(0).getValueType(); + if (SrcVT.getVectorNumElements() != VT.getVectorNumElements()) + return false; + V = V.getOperand(0); + HasShuffleIntoBitcast = true; + } - assert((VT == MVT::v4i32 || VT == MVT::v4f32) && - "unsupported shuffle type"); + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt); + V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1); - if (V2.getOpcode() == ISD::UNDEF) - V2 = V1; + // Skip one more bit_convert if necessary + if (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); - // v4i32 or v4f32 - return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG); -} + if (ISD::isNormalLoad(V.getNode())) { + // Is the original load suitable? + LoadSDNode *LN0 = cast<LoadSDNode>(V); -static + // FIXME: avoid the multi-use bug that is preventing lots of + // of foldings to be detected, this is still wrong of course, but + // give the temporary desired behavior, and if it happens that + // the load has real more uses, during isel it will not fold, and + // will generate poor code. + if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse() + return false; + + if (!HasShuffleIntoBitcast) + return true; + + // If there's a bitcast before the shuffle, check if the load type and + // alignment is valid. + unsigned Align = LN0->getAlignment(); + unsigned NewAlign = + TLI.getTargetData()->getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) + return false; + } + + return true; +} + +static +SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + + // Canonizalize to v2f64. + V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); + return DAG.getNode(ISD::BITCAST, dl, VT, + getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64, + V1, DAG)); +} + +static +SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, + bool HasSSE2) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + + assert(VT != MVT::v2i64 && "unsupported shuffle type"); + + if (HasSSE2 && VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG); + + // v4f32 or v4i32 + return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG); +} + +static +SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + + assert((VT == MVT::v4i32 || VT == MVT::v4f32) && + "unsupported shuffle type"); + + if (V2.getOpcode() == ISD::UNDEF) + V2 = V1; + + // v4i32 or v4f32 + return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG); +} + +static SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -5191,6 +5472,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) CanFoldLoad = true; + // Both of them can't be memory operations though. + if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) + CanFoldLoad = false; + if (CanFoldLoad) { if (HasSSE2 && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); @@ -5228,7 +5513,7 @@ static inline unsigned getUNPCKLOpcode(EVT VT) { case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; default: - llvm_unreachable("Unknow type for unpckl"); + llvm_unreachable("Unknown type for unpckl"); } return 0; } @@ -5242,63 +5527,111 @@ static inline unsigned getUNPCKHOpcode(EVT VT) { case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v8i16: return X86ISD::PUNPCKHWD; default: - llvm_unreachable("Unknow type for unpckh"); + llvm_unreachable("Unknown type for unpckh"); } return 0; } -SDValue -X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { +static +SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI, + const X86Subtarget *Subtarget) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - unsigned NumElems = VT.getVectorNumElements(); - bool isMMX = VT.getSizeInBits() == 64; - bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; - bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; - bool V1IsSplat = false; - bool V2IsSplat = false; - bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX(); - bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX(); - MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); if (isZeroShuffle(SVOp)) return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); - // Promote splats to v4f32. + // Handle splat operations if (SVOp->isSplat()) { - if (isMMX || NumElems < 4) + // Special case, this is the only place now where it's + // allowed to return a vector_shuffle operation without + // using a target specific node, because *hopefully* it + // will be optimized away by the dag combiner. + if (VT.getVectorNumElements() <= 4 && + CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) return Op; + + // Handle splats by matching through known masks + if (VT.getVectorNumElements() <= 4) + return SDValue(); + + // Canonicalize all of the remaining to v4f32. return PromoteSplat(SVOp, DAG); } // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! if (VT == MVT::v8i16 || VT == MVT::v16i8) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - LowerVECTOR_SHUFFLE(NewOp, DAG)); + return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { // FIXME: Figure out a cleaner way to do this. // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) { if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false)) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), DAG, Subtarget, dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { - SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); + SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp))) return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), DAG, Subtarget, dl); } } + return SDValue(); +} + +SDValue +X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned NumElems = VT.getVectorNumElements(); + bool isMMX = VT.getSizeInBits() == 64; + bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + bool V1IsSplat = false; + bool V2IsSplat = false; + bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX(); + bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX(); + bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX(); + MachineFunction &MF = DAG.getMachineFunction(); + bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); + + // Shuffle operations on MMX not supported. + if (isMMX) + return Op; + + // Vector shuffle lowering takes 3 steps: + // + // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable + // narrowing and commutation of operands should be handled. + // 2) Matching of shuffles with known shuffle masks to x86 target specific + // shuffle nodes. + // 3) Rewriting of unmatched masks into new generic shuffle operations, + // so the shuffle can be broken into other shuffles and the legalizer can + // try the lowering again. + // + // The general ideia is that no vector_shuffle operation should be left to + // be matched during isel, all of them must be converted to a target specific + // node here. + + // Normalize the input vectors. Here splats, zeroed vectors, profitable + // narrowing and commutation of operands should be handled. The actual code + // doesn't include all of those, work in progress... + SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget); + if (NewOp.getNode()) + return NewOp; // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. @@ -5309,6 +5642,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (VT != MVT::v2i64 && VT != MVT::v2f64) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef && + RelaxedMayFoldVectorLoad(V1)) + return getMOVDDup(Op, dl, V1, DAG); + + if (X86::isMOVHLPS_v_undef_Mask(SVOp)) + return getMOVHighToLow(Op, dl, DAG); + + // Use to match splats + if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef && + (VT == MVT::v2f64 || VT == MVT::v2i64)) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then // during isel it can match several different instructions, not only pshufd @@ -5349,7 +5694,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); - if (!isMMX && !X86::isMOVLPMask(SVOp)) { + if (!X86::isMOVLPMask(SVOp)) { if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64)) return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG); @@ -5359,22 +5704,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (!isMMX) { - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) - return getMOVLowToHigh(Op, dl, DAG, HasSSE2); + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + return getMOVLowToHigh(Op, dl, DAG, HasSSE2); - if (X86::isMOVHLPSMask(SVOp)) - return getMOVHighToLow(Op, dl, DAG); + if (X86::isMOVHLPSMask(SVOp)) + return getMOVHighToLow(Op, dl, DAG); - if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); + if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG); - if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); + if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4) + return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG); - if (X86::isMOVLPMask(SVOp)) - return getMOVLP(Op, dl, DAG, HasSSE2); - } + if (X86::isMOVLPMask(SVOp)) + return getMOVLP(Op, dl, DAG, HasSSE2); if (ShouldXformToMOVHLPS(SVOp) || ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) @@ -5414,13 +5757,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKL_v_undef_Mask(SVOp) || X86::isUNPCKLMask(SVOp)) - return (isMMX) ? - Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKLMask(SVOp)) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); - if (X86::isUNPCKH_v_undef_Mask(SVOp) || X86::isUNPCKHMask(SVOp)) - return (isMMX) ? - Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKHMask(SVOp)) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -5443,19 +5784,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); - if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || X86::isUNPCKLMask(NewSVOp)) - return (isMMX) ? - NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKLMask(NewSVOp)) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); - if (X86::isUNPCKH_v_undef_Mask(NewSVOp) || X86::isUNPCKHMask(NewSVOp)) - return (isMMX) ? - NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKHMask(NewSVOp)) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); } - // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. - // Normalize the node to match x86 shuffle ops if needed - if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) return CommuteVectorShuffle(SVOp, DAG); // The checks below are all present in isShuffleMaskLegal, but they are @@ -5464,15 +5801,18 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SmallVector<int, 16> M; SVOp->getMask(M); - // Very little shuffling can be done for 64-bit vectors right now. - if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue(); + if (isPALIGNRMask(M, VT, HasSSSE3)) + return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, + X86::getShufflePALIGNRImmediate(SVOp), + DAG); - // FIXME: pshufb, blends, shifts. - if (VT.getVectorNumElements() == 2 || - ShuffleVectorSDNode::isSplatMask(&M[0], VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3())) - return Op; + if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && + SVOp->getSplatIndex() == 0 && V2IsUndef) { + if (VT == MVT::v2f64) + return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); + if (VT == MVT::v2i64) + return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); + } if (isPSHUFHWMask(M, VT)) return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1, @@ -5494,6 +5834,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { TargetMask, DAG); } + if (X86::isUNPCKL_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + if (X86::isUNPCKH_v_undef_Mask(SVOp)) + if (VT != MVT::v2i64 && VT != MVT::v2f64) + return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG); @@ -5507,8 +5854,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return NewOp; } - // Handle all 4 wide cases with a number of shuffles except for MMX. - if (NumElems == 4 && !isMMX) + // Handle all 4 wide cases with a number of shuffles. + if (NumElems == 4) return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); return SDValue(); @@ -5531,7 +5878,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, if (Idx == 0) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), Op.getOperand(1))); @@ -5552,14 +5899,14 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, if ((User->getOpcode() != ISD::STORE || (isa<ConstantSDNode>(Op.getOperand(1)) && cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) && - (User->getOpcode() != ISD::BIT_CONVERT || + (User->getOpcode() != ISD::BITCAST || User->getValueType(0) != MVT::i32)) return SDValue(); SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), Op.getOperand(1)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract); } else if (VT == MVT::i32) { // ExtractPS works with constant index. if (isa<ConstantSDNode>(Op.getOperand(1))) @@ -5575,6 +5922,38 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, if (!isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); + SDValue Vec = Op.getOperand(0); + EVT VecVT = Vec.getValueType(); + + // If this is a 256-bit vector result, first extract the 128-bit + // vector and then extract from the 128-bit vector. + if (VecVT.getSizeInBits() > 128) { + DebugLoc dl = Op.getNode()->getDebugLoc(); + unsigned NumElems = VecVT.getVectorNumElements(); + SDValue Idx = Op.getOperand(1); + + if (!isa<ConstantSDNode>(Idx)) + return SDValue(); + + unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128); + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + // Get the 128-bit vector. + bool Upper = IdxVal >= ExtractNumElems; + Vec = Extract128BitVector(Vec, Idx, DAG, dl); + + // Extract from it. + SDValue ScaledIdx = Idx; + if (Upper) + ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx, + DAG.getConstant(ExtractNumElems, + Idx.getValueType())); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, + ScaledIdx); + } + + assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); + if (Subtarget->hasSSE41()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); if (Res.getNode()) @@ -5590,7 +5969,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, if (Idx == 0) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - DAG.getNode(ISD::BIT_CONVERT, dl, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. @@ -5650,8 +6029,6 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, unsigned Opc; if (VT == MVT::v8i16) Opc = X86ISD::PINSRW; - else if (VT == MVT::v4i16) - Opc = X86ISD::MMX_PINSRW; else if (VT == MVT::v16i8) Opc = X86ISD::PINSRB; else @@ -5689,17 +6066,45 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + SDValue N2 = Op.getOperand(2); + + // If this is a 256-bit vector result, first insert into a 128-bit + // vector and then insert into the 256-bit vector. + if (VT.getSizeInBits() > 128) { + if (!isa<ConstantSDNode>(N2)) + return SDValue(); + + // Get the 128-bit vector. + unsigned NumElems = VT.getVectorNumElements(); + unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue(); + bool Upper = IdxVal >= NumElems / 2; + + SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl); + + // Insert into it. + SDValue ScaledN2 = N2; + if (Upper) + ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, + DAG.getConstant(NumElems / + (VT.getSizeInBits() / 128), + N2.getValueType())); + Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, + N1, ScaledN2); + + // Insert the 128-bit vector + // FIXME: Why UNDEF? + return Insert128BitVector(N0, Op, N2, DAG, dl); + } + if (Subtarget->hasSSE41()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); if (EltVT == MVT::i8) return SDValue(); - DebugLoc dl = Op.getDebugLoc(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. @@ -5707,31 +6112,79 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); - return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, - dl, VT, N0, N1, N2); + return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); } return SDValue(); } SDValue X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { + LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - + EVT OpVT = Op.getValueType(); + + // If this is a 256-bit vector result, first insert into a 128-bit + // vector and then insert into the 256-bit vector. + if (OpVT.getSizeInBits() > 128) { + // Insert into a 128-bit vector. + EVT VT128 = EVT::getVectorVT(*Context, + OpVT.getVectorElementType(), + OpVT.getVectorNumElements() / 2); + + Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); + + // Insert the 128-bit vector. + return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op, + DAG.getConstant(0, MVT::i32), + DAG, dl); + } + if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - EVT VT = MVT::v2i32; - switch (Op.getValueType().getSimpleVT().SimpleTy) { - default: break; - case MVT::v16i8: - case MVT::v8i16: - VT = MVT::v4i32; - break; + assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 && + "Expected an SSE type!"); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); +} + +// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in +// a simple subregister reference or explicit instructions to grab +// upper bits of a vector. +SDValue +X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->hasAVX()) { + DebugLoc dl = Op.getNode()->getDebugLoc(); + SDValue Vec = Op.getNode()->getOperand(0); + SDValue Idx = Op.getNode()->getOperand(1); + + if (Op.getNode()->getValueType(0).getSizeInBits() == 128 + && Vec.getNode()->getValueType(0).getSizeInBits() == 256) { + return Extract128BitVector(Vec, Idx, DAG, dl); + } } - return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt)); + return SDValue(); +} + +// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a +// simple superregister reference or explicit instructions to insert +// the upper bits of a vector. +SDValue +X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->hasAVX()) { + DebugLoc dl = Op.getNode()->getDebugLoc(); + SDValue Vec = Op.getNode()->getOperand(0); + SDValue SubVec = Op.getNode()->getOperand(1); + SDValue Idx = Op.getNode()->getOperand(2); + + if (Op.getNode()->getValueType(0).getSizeInBits() == 256 + && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) { + return Insert128BitVector(Vec, SubVec, Idx, DAG, dl); + } + } + return SDValue(); } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as @@ -5797,12 +6250,11 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. - if (OpFlag) { + if (OpFlag) Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()), Result); - } return Result; } @@ -5906,7 +6358,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0, false, false, 0); + MachinePointerInfo::getGOT(), false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -5929,7 +6381,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); DebugLoc dl = GA->getDebugLoc(); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), @@ -5978,14 +6430,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, TLSModel::Model model, bool is64Bit) { DebugLoc dl = GA->getDebugLoc(); - // Get the Thread Pointer - SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress, - DebugLoc(), PtrVT, - DAG.getRegister(is64Bit? X86::FS : X86::GS, - MVT::i32)); - SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, - NULL, 0, false, false, 0); + // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). + Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), + is64Bit ? 257 : 256)); + + SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getIntPtrConstant(0), + MachinePointerInfo(Ptr), false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is @@ -6004,14 +6456,14 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - PseudoSourceValue::getGOT(), 0, false, false, 0); + MachinePointerInfo::getGOT(), false, false, 0); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -6020,29 +6472,29 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GA->getGlobal(); if (Subtarget->isTargetELF()) { // TODO: implement the "local dynamic" model // TODO: implement the "initial exec"model for pic executables - + // If GV is an alias then use the aliasee for determining // thread-localness. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) GV = GA->resolveAliasedGlobal(false); - - TLSModel::Model model + + TLSModel::Model model = getTLSModel(GV, getTargetMachine().getRelocationModel()); - + switch (model) { case TLSModel::GeneralDynamic: case TLSModel::LocalDynamic: // not implemented if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); - + case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, @@ -6053,7 +6505,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { unsigned char OpFlag = 0; unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ? X86ISD::WrapperRIP : X86ISD::Wrapper; - + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) && @@ -6062,24 +6514,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { OpFlag = X86II::MO_TLVP_PIC_BASE; else OpFlag = X86II::MO_TLVP; - DebugLoc DL = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, - getPointerTy(), + GA->getValueType(0), GA->getOffset(), OpFlag); SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); - + // With PIC32, the address is actually $g + Offset. if (PIC32) Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()), Offset); - + // Lowering the machine isd will make sure everything is in the right // location. - SDValue Args[] = { Offset }; - SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1); - + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Args[] = { Chain, Offset }; + Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2); + // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setAdjustsStack(true); @@ -6089,7 +6543,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); } - + assert(false && "TLS not implemented for this target."); @@ -6148,12 +6602,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); - if (SrcVT.isVector()) { - if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { - return Op; - } + if (SrcVT.isVector()) return SDValue(); - } assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -6174,25 +6624,36 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, - SDValue StackSlot, + SDValue StackSlot, SelectionDAG &DAG) const { // Build the FILD - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); SDVTList Tys; bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); if (useSSE) - Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); + Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue); else Tys = DAG.getVTList(Op.getValueType(), MVT::Other); + + unsigned ByteSize = SrcVT.getSizeInBits()/8; + + int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex(); + MachineMemOperand *MMO = + DAG.getMachineFunction() + .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOLoad, ByteSize, ByteSize); + SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) }; - SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl, - Tys, Ops, array_lengthof(Ops)); + SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : + X86ISD::FILD, DL, + Tys, Ops, array_lengthof(Ops), + SrcVT, MMO); if (useSSE) { Chain = Result.getValue(1); @@ -6202,15 +6663,23 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, // shouldn't be necessary except that RFP cannot be live across // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); + unsigned SSFISize = Op.getValueType().getSizeInBits()/8; + int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); Tys = DAG.getVTList(MVT::Other); SDValue Ops[] = { Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag }; - Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops)); - Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0, + MachineMemOperand *MMO = + DAG.getMachineFunction() + .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOStore, SSFISize, SSFISize); + + Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, + Ops, array_lengthof(Ops), + Op.getValueType(), MMO); + Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); } @@ -6284,12 +6753,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, DAG.getIntPtrConstant(0))); SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); - SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); + SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); @@ -6317,19 +6786,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, DAG.getIntPtrConstant(0))); Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load), + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), DAG.getIntPtrConstant(0)); // Or the load with the bias. SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Load)), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or), + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or), DAG.getIntPtrConstant(0)); // Subtract the bias. @@ -6374,24 +6843,34 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0, false, false, 0); + StackSlot, MachinePointerInfo(), + false, false, 0); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0, false, false, 0); + OffsetSlot, MachinePointerInfo(), + false, false, 0); SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); return Fild; } assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0, false, false, 0); + StackSlot, MachinePointerInfo(), + false, false, 0); // For i64 source, we need to add the appropriate power of 2 if the input // was negative. This is the same as the optimization in // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, // we must be careful to do the computation in x87 extended precision, not // in SSE. (The generic code can't know it's OK to do this, or how to.) + int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex(); + MachineMemOperand *MMO = + DAG.getMachineFunction() + .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOLoad, 8, 8); + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; - SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3); + SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3, + MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); @@ -6414,9 +6893,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(), - FudgePtr, PseudoSourceValue::getConstantPool(), - 0, MVT::f32, false, false, 4); + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + FudgePtr, MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); @@ -6424,7 +6903,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, std::pair<SDValue,SDValue> X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); EVT DstTy = Op.getValueType(); @@ -6453,6 +6932,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); + + unsigned Opc; switch (DstTy.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); @@ -6463,37 +6944,43 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { SDValue Chain = DAG.getEntryNode(); SDValue Value = Op.getOperand(0); - if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { + EVT TheVT = Op.getOperand(0).getValueType(); + if (isScalarFPTypeInSSEReg(TheVT)) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); - Chain = DAG.getStore(Chain, dl, Value, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0, + Chain = DAG.getStore(Chain, DL, Value, StackSlot, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { - Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) + Chain, StackSlot, DAG.getValueType(TheVT) }; - Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOLoad, MemSize, MemSize); + Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3, + DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); } + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOStore, MemSize, MemSize); + // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; - SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3); + SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), + Ops, 3, DstTy, MMO); return std::make_pair(FIST, StackSlot); } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) { - if (Op.getValueType() == MVT::v2i32 && - Op.getOperand(0).getValueType() == MVT::v2f64) { - return Op; - } + if (Op.getValueType().isVector()) return SDValue(); - } std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; @@ -6502,7 +6989,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0, false, false, 0); + FIST, StackSlot, MachinePointerInfo(), false, false, 0); } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, @@ -6513,7 +7000,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0, false, false, 0); + FIST, StackSlot, MachinePointerInfo(), false, false, 0); } SDValue X86TargetLowering::LowerFABS(SDValue Op, @@ -6539,7 +7026,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -6566,14 +7053,14 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); if (VT.isVector()) { - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op.getOperand(0)), - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask))); + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask))); } else { return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); } @@ -6615,7 +7102,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); @@ -6625,7 +7112,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit); SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit, DAG.getConstant(32, MVT::i32)); - SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit); + SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit); SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit, DAG.getIntPtrConstant(0)); } @@ -6644,7 +7131,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); @@ -6884,8 +7371,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Lower (X & (1 << N)) == 0 to BT(X, N). // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). - if (Op0.getOpcode() == ISD::AND && - Op0.hasOneUse() && + if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant && cast<ConstantSDNode>(Op1)->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { @@ -6894,19 +7380,25 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return NewSetCC; } - // Look for "(setcc) == / != 1" to avoid unncessary setcc. - if (Op0.getOpcode() == X86ISD::SETCC && - Op1.getOpcode() == ISD::Constant && + // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of + // these. + if (Op1.getOpcode() == ISD::Constant && (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || cast<ConstantSDNode>(Op1)->isNullValue()) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); - bool Invert = (CC == ISD::SETNE) ^ - cast<ConstantSDNode>(Op1)->isNullValue(); - if (Invert) + + // If the input is a setcc, then reuse the input setcc or use a new one with + // the inverted condition. + if (Op0.getOpcode() == X86ISD::SETCC) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast<ConstantSDNode>(Op1)->isNullValue(); + if (!Invert) return Op0; + CCode = X86::GetOppositeBranchCondition(CCode); - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } } bool isFP = Op1.getValueType().isFloatingPoint(); @@ -6914,17 +7406,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (X86CC == X86::COND_INVALID) return SDValue(); - SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); - - // Use sbb x, x to materialize carry bit into a GPR. - if (X86CC == X86::COND_B) - return DAG.getNode(ISD::AND, dl, MVT::i8, - DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond), - DAG.getConstant(1, MVT::i8)); - + SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond); + DAG.getConstant(X86CC, MVT::i8), EFLAGS); } SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { @@ -6996,11 +7480,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { switch (VT.getSimpleVT().SimpleTy) { default: break; - case MVT::v8i8: case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; - case MVT::v4i16: case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; - case MVT::v2i32: case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; } @@ -7051,6 +7532,8 @@ static bool isX86LogicalCmp(SDValue Op) { if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || Opc == X86ISD::SUB || + Opc == X86ISD::ADC || + Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || Opc == X86ISD::INC || @@ -7060,13 +7543,28 @@ static bool isX86LogicalCmp(SDValue Op) { Opc == X86ISD::AND)) return true; + if (Op.getResNo() == 2 && Opc == X86ISD::UMUL) + return true; + return false; } +static bool isZero(SDValue V) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V); + return C && C->isNullValue(); +} + +static bool isAllOnes(SDValue V) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V); + return C && C->isAllOnesValue(); +} + SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Cond = Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); SDValue CC; if (Cond.getOpcode() == ISD::SETCC) { @@ -7075,34 +7573,44 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = NewCond; } - // (select (x == 0), -1, 0) -> (sign_bit (x - 1)) - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); + // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y + // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y + // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y + // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y if (Cond.getOpcode() == X86ISD::SETCC && - cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) { + Cond.getOperand(1).getOpcode() == X86ISD::CMP && + isZero(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); - if (Cmp.getOpcode() == X86ISD::CMP) { - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1); + + unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue(); + + if ((isAllOnes(Op1) || isAllOnes(Op2)) && + (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { + SDValue Y = isAllOnes(Op2) ? Op1 : Op2; + + SDValue CmpOp0 = Cmp.getOperand(0); + Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, + CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); + + SDValue Res = // Res = 0 or -1. + DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getConstant(X86::COND_B, MVT::i8), Cmp); + + if (isAllOnes(Op1) != (CondCode == X86::COND_E)) + Res = DAG.getNOT(DL, Res, Res.getValueType()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2); - ConstantSDNode *RHSC = - dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode()); - if (N1C && N1C->isAllOnesValue() && - N2C && N2C->isNullValue() && - RHSC && RHSC->isNullValue()) { - SDValue CmpOp0 = Cmp.getOperand(0); - Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); - return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(), - DAG.getConstant(X86::COND_B, MVT::i8), Cmp); - } + if (N2C == 0 || !N2C->isNullValue()) + Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); + return Res; } } - // Look pass (and (setcc_carry (cmp ...)), 1). + // Look past (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); - if (C && C->getAPIntValue() == 1) + if (C && C->getAPIntValue() == 1) Cond = Cond.getOperand(0); } @@ -7135,8 +7643,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. - if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { - SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG); if (NewSetCC.getNode()) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); @@ -7150,11 +7658,28 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = EmitTest(Cond, X86::COND_NE, DAG); } + // a < b ? -1 : 0 -> RES = ~setcc_carry + // a < b ? 0 : -1 -> RES = setcc_carry + // a >= b ? -1 : 0 -> RES = setcc_carry + // a >= b ? 0 : -1 -> RES = ~setcc_carry + if (Cond.getOpcode() == X86ISD::CMP) { + unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); + + if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && + (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) { + SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getConstant(X86::COND_B, MVT::i8), Cond); + if (isAllOnes(Op1) != (CondCode == X86::COND_B)) + return DAG.getNOT(DL, Res, Res.getValueType()); + return Res; + } + } + // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = { Op2, Op1, CC, Cond }; - return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); } // isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or @@ -7209,7 +7734,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); - if (C && C->getAPIntValue() == 1) + if (C && C->getAPIntValue() == 1) Cond = Cond.getOperand(0); } @@ -7310,7 +7835,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. - if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); if (NewSetCC.getNode()) { CC = NewSetCC.getOperand(0); @@ -7337,8 +7862,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetCygMing() && - "This should be used only on Cygwin/Mingw targets"); + assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) && + "This should be used only on Windows targets"); DebugLoc dl = Op.getDebugLoc(); // Get the inputs. @@ -7353,9 +7878,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); Flag = Chain.getValue(1); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag); + Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); Flag = Chain.getValue(1); Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); @@ -7369,15 +7894,15 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); - if (!Subtarget->is64Bit()) { + if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, - false, false, 0); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); } // __va_list_tag: @@ -7388,48 +7913,107 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SmallVector<SDValue, 8> MemOps; SDValue FIN = Op.getOperand(1); // Store gp_offset - SDValue Store = DAG.getStore(Op.getOperand(0), dl, + SDValue Store = DAG.getStore(Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsGPOffset(), MVT::i32), - FIN, SV, 0, false, false, 0); + FIN, MachinePointerInfo(SV), false, false, 0); MemOps.push_back(Store); // Store fp_offset - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); - Store = DAG.getStore(Op.getOperand(0), dl, + Store = DAG.getStore(Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), MVT::i32), - FIN, SV, 4, false, false, 0); + FIN, MachinePointerInfo(SV, 4), false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8, + Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, + MachinePointerInfo(SV, 8), false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16, - false, false, 0); + Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, + MachinePointerInfo(SV, 16), false, false, 0); MemOps.push_back(Store); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], MemOps.size()); } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - // X86-64 va_list is a struct { i32, i32, i8*, i8* }. - assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); + assert(Subtarget->is64Bit() && + "LowerVAARG only handles 64-bit va_arg!"); + assert((Subtarget->isTargetLinux() || + Subtarget->isTargetDarwin()) && + "Unhandled target in LowerVAARG"); + assert(Op.getNode()->getNumOperands() == 4); + SDValue Chain = Op.getOperand(0); + SDValue SrcPtr = Op.getOperand(1); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + unsigned Align = Op.getConstantOperandVal(3); + DebugLoc dl = Op.getDebugLoc(); - report_fatal_error("VAArgInst is not yet implemented for x86-64!"); - return SDValue(); + EVT ArgVT = Op.getNode()->getValueType(0); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy); + uint8_t ArgMode; + + // Decide which area this value should be read from. + // TODO: Implement the AMD64 ABI in its entirety. This simple + // selection mechanism works only for the basic types. + if (ArgVT == MVT::f80) { + llvm_unreachable("va_arg for f80 not yet implemented"); + } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) { + ArgMode = 2; // Argument passed in XMM register. Use fp_offset. + } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) { + ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset. + } else { + llvm_unreachable("Unhandled argument type in LowerVAARG"); + } + + if (ArgMode == 2) { + // Sanity Check: Make sure using fp_offset makes sense. + assert(!UseSoftFloat && + !(DAG.getMachineFunction() + .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && + Subtarget->hasXMM()); + } + + // Insert VAARG_64 node into the DAG + // VAARG_64 returns two values: Variable Argument Address, Chain + SmallVector<SDValue, 11> InstOps; + InstOps.push_back(Chain); + InstOps.push_back(SrcPtr); + InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32)); + InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8)); + InstOps.push_back(DAG.getConstant(Align, MVT::i32)); + SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other); + SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, + VTs, &InstOps[0], InstOps.size(), + MVT::i64, + MachinePointerInfo(SV), + /*Align=*/0, + /*Volatile=*/false, + /*ReadMem=*/true, + /*WriteMem=*/true); + Chain = VAARG.getValue(1); + + // Load the next argument and return it + return DAG.getLoad(ArgVT, dl, + Chain, + VAARG, + MachinePointerInfo(), + false, false, 0); } SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { @@ -7440,11 +8024,12 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { SDValue SrcPtr = Op.getOperand(2); const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); - return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr, + return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, - false, DstSV, 0, SrcSV, 0); + false, + MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } SDValue @@ -7713,10 +8298,11 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); } else { ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); +// FIXME this must be lowered to get rid of the invalid type. } EVT VT = Op.getValueType(); - ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt); + ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); @@ -7740,13 +8326,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0, false, false, 0); + MachinePointerInfo(), false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0, false, false, 0); + RetAddrFI, MachinePointerInfo(), false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { @@ -7759,7 +8345,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), false, false, 0); return FrameAddr; } @@ -7784,7 +8371,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, DAG.getIntPtrConstant(TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); - Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), + false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); MF.getRegInfo().addLiveOut(StoreAddrReg); @@ -7819,11 +8407,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 0, false, false, 0); + Addr, MachinePointerInfo(TrmpAddr), + false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); - OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, + MachinePointerInfo(TrmpAddr, 2), false, false, 2); // Load the 'nest' parameter value into R10. @@ -7832,11 +8422,13 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 10, false, false, 0); + Addr, MachinePointerInfo(TrmpAddr, 10), + false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); - OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, + MachinePointerInfo(TrmpAddr, 12), false, false, 2); // Jump to the nested function. @@ -7844,13 +8436,15 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 20, false, false, 0); + Addr, MachinePointerInfo(TrmpAddr, 20), + false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, - TrmpAddr, 22, false, false, 0); + MachinePointerInfo(TrmpAddr, 22), + false, false, 0); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; @@ -7912,22 +8506,26 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), - Trmp, TrmpAddr, 0, false, false, 0); + Trmp, MachinePointerInfo(TrmpAddr), + false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); - OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, + MachinePointerInfo(TrmpAddr, 1), false, false, 1); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, - TrmpAddr, 5, false, false, 1); + MachinePointerInfo(TrmpAddr, 5), + false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); - OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, + MachinePointerInfo(TrmpAddr, 6), false, false, 1); SDValue Ops[] = @@ -7959,44 +8557,51 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); const TargetMachine &TM = MF.getTarget(); - const TargetFrameInfo &TFI = *TM.getFrameInfo(); + const TargetFrameLowering &TFI = *TM.getFrameLowering(); unsigned StackAlignment = TFI.getStackAlignment(); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); // Save FP Control Word to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other, - DAG.getEntryNode(), StackSlot); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), + MachineMemOperand::MOStore, 2, 2); + + SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; + SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, + DAG.getVTList(MVT::Other), + Ops, 2, MVT::i16, MMO); // Load FP Control Word from stack slot - SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0, - false, false, 0); + SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, + MachinePointerInfo(), false, false, 0); // Transform as necessary SDValue CWD1 = - DAG.getNode(ISD::SRL, dl, MVT::i16, - DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::SRL, DL, MVT::i16, + DAG.getNode(ISD::AND, DL, MVT::i16, CWD, DAG.getConstant(0x800, MVT::i16)), DAG.getConstant(11, MVT::i8)); SDValue CWD2 = - DAG.getNode(ISD::SRL, dl, MVT::i16, - DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::SRL, DL, MVT::i16, + DAG.getNode(ISD::AND, DL, MVT::i16, CWD, DAG.getConstant(0x400, MVT::i16)), DAG.getConstant(9, MVT::i8)); SDValue RetVal = - DAG.getNode(ISD::AND, dl, MVT::i16, - DAG.getNode(ISD::ADD, dl, MVT::i16, - DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2), + DAG.getNode(ISD::AND, DL, MVT::i16, + DAG.getNode(ISD::ADD, DL, MVT::i16, + DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2), DAG.getConstant(1, MVT::i16)), DAG.getConstant(3, MVT::i16)); return DAG.getNode((VT.getSizeInBits() < 16 ? - ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); + ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { @@ -8122,16 +8727,16 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { Op.getOperand(1), DAG.getConstant(23, MVT::i32)); ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U)); - + std::vector<Constant*> CV(4, CI); Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); - Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); } @@ -8149,7 +8754,7 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { Constant *C = ConstantVector::get(CVM1); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, 16); // r = pblendv(r, psllw(r & (char16)15, 4), a); @@ -8157,31 +8762,27 @@ SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), - R, M, Op); + R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); - + C = ConstantVector::get(CVM2); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, false, false, 16); - + MachinePointerInfo::getConstantPool(), + false, false, 16); + // r = pblendv(r, psllw(r & (char16)63, 2), a); M = DAG.getNode(ISD::AND, dl, VT, R, M); M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), - R, M, Op); + R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); - + // return pblendv(r, r+r, a); - R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32), + R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op); return R; } @@ -8198,8 +8799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue RHS = N->getOperand(1); unsigned BaseOp = 0; unsigned Cond = 0; - DebugLoc dl = Op.getDebugLoc(); - + DebugLoc DL = Op.getDebugLoc(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: @@ -8238,19 +8838,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { BaseOp = X86ISD::SMUL; Cond = X86::COND_O; break; - case ISD::UMULO: - BaseOp = X86ISD::UMUL; - Cond = X86::COND_B; - break; + case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs + SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0), + MVT::i32); + SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS); + + SDValue SetCC = + DAG.getNode(X86ISD::SETCC, DL, MVT::i8, + DAG.getConstant(X86::COND_O, MVT::i32), + SDValue(Sum.getNode(), 2)); + + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); + return Sum; + } } // Also sets EFLAGS. SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); - SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS); + SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); SDValue SetCC = - DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1), - DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); + DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1), + DAG.getConstant(Cond, MVT::i32), + SDValue(Sum.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); return Sum; @@ -8258,10 +8868,10 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - + if (!Subtarget->hasSSE2()) { SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, + SDValue Zero = DAG.getConstant(0, Subtarget->is64Bit() ? MVT::i64 : MVT::i32); SDValue Ops[] = { DAG.getRegister(X86::ESP, MVT::i32), // Base @@ -8272,37 +8882,37 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{ Zero, Chain }; - SDNode *Res = + SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, array_lengthof(Ops)); return SDValue(Res, 0); } - + unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); if (!isDev) return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - + unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); - + // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; if (!Op1 && !Op2 && !Op3 && Op4) return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); - + // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; if (Op1 && !Op2 && !Op3 && !Op4) return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); - - // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), + + // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), // (MFENCE)>; return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); } SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); unsigned Reg = 0; unsigned size = 0; switch(T.getSimpleVT().SimpleTy) { @@ -8316,24 +8926,26 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { Reg = X86::RAX; size = 8; break; } - SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg, + SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, Op.getOperand(2), SDValue()); SDValue Ops[] = { cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3), DAG.getTargetConstant(size, MVT::i8), cpIn.getValue(1) }; - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); + MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, + Ops, 5, T, MMO); SDValue cpOut = - DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1)); + DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); return cpOut; } SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->is64Bit() && "Result not type legalized?"); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue TheChain = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); @@ -8349,16 +8961,15 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } -SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op, +SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); EVT DstVT = Op.getValueType(); - assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && - Subtarget->hasMMX() && !DisableMMX) && - "Unexpected custom BIT_CONVERT"); - assert((DstVT == MVT::i64 || + assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && + Subtarget->hasMMX() && "Unexpected custom BITCAST"); + assert((DstVT == MVT::i64 || (DstVT.isVector() && DstVT.getSizeInBits()==64)) && - "Unexpected custom BIT_CONVERT"); + "Unexpected custom BITCAST"); // i64 <=> MMX conversions are Legal. if (SrcVT==MVT::i64 && DstVT.isVector()) return Op; @@ -8370,6 +8981,7 @@ SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op, // All other conversions need to be expanded. return SDValue(); } + SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); @@ -8384,6 +8996,32 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { cast<AtomicSDNode>(Node)->getAlignment()); } +static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getNode()->getValueType(0); + + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + unsigned Opc; + bool ExtraOp = false; + switch (Op.getOpcode()) { + default: assert(0 && "Invalid code"); + case ISD::ADDC: Opc = X86ISD::ADD; break; + case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break; + case ISD::SUBC: Opc = X86ISD::SUB; break; + case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break; + } + + if (!ExtraOp) + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + Op.getOperand(1), Op.getOperand(2)); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -8397,6 +9035,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); @@ -8441,7 +9081,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); - case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG); + case ISD::BITCAST: return LowerBITCAST(Op, DAG); + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUBC: + case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); } } @@ -8478,6 +9122,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, default: assert(false && "Do not know how to custom type legalize this operation!"); return; + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUBC: + case ISD::SUBE: + // We don't want to expand or promote these. + return; case ISD::FP_TO_SINT: { std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(SDValue(N, 0), DAG, true); @@ -8485,13 +9135,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0, - false, false, 0)); + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, + MachinePointerInfo(), false, false, 0)); } return; } case ISD::READCYCLECOUNTER: { - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue TheChain = N->getOperand(0); SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32, @@ -8527,8 +9177,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Ops[] = { swapInH.getValue(0), N->getOperand(1), swapInH.getValue(1) }; - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); + MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, + Ops, 3, T, MMO); SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX, MVT::i32, Result.getValue(1)); SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX, @@ -8601,15 +9253,18 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; - case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; + case X86ISD::PANDN: return "X86ISD::PANDN"; + case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; + case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; + case X86ISD::PSIGND: return "X86ISD::PSIGND"; + case X86ISD::PBLENDVB: return "X86ISD::PBLENDVB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; - case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; @@ -8637,6 +9292,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; case X86ISD::ADD: return "X86ISD::ADD"; case X86ISD::SUB: return "X86ISD::SUB"; + case X86ISD::ADC: return "X86ISD::ADC"; + case X86ISD::SBB: return "X86ISD::SBB"; case X86ISD::SMUL: return "X86ISD::SMUL"; case X86ISD::UMUL: return "X86ISD::UMUL"; case X86ISD::INC: return "X86ISD::INC"; @@ -8681,7 +9338,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; - case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; + case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; + case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; } } @@ -9203,15 +9861,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) && "Target must have SSE4.2 or AVX features enabled"); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned Opc; - if (!Subtarget->hasAVX()) { if (memArg) Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; @@ -9224,24 +9879,318 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr; } - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); - + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc)); for (unsigned i = 0; i < numArgs; ++i) { MachineOperand &Op = MI->getOperand(i+1); - if (!(Op.isReg() && Op.isImplicit())) MIB.addOperand(Op); } - - BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) + BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // Address into RAX/EAX, other two args into ECX, EDX. + unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; + unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg); + for (int i = 0; i < X86::AddrNumOperands; ++i) + MIB.addOperand(MI->getOperand(i)); + + unsigned ValOps = X86::AddrNumOperands; + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) + .addReg(MI->getOperand(ValOps).getReg()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX) + .addReg(MI->getOperand(ValOps+1).getReg()); + + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr)); + + MI->eraseFromParent(); // The pseudo is gone now. + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const { + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + // First arg in ECX, the second in EAX. + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) + .addReg(MI->getOperand(0).getReg()); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) + .addReg(MI->getOperand(1).getReg()); + // The instruction doesn't actually take any operands though. + BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr)); + + MI->eraseFromParent(); // The pseudo is gone now. return BB; } MachineBasicBlock * +X86TargetLowering::EmitVAARG64WithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const { + // Emit va_arg instruction on X86-64. + + // Operands to this pseudo-instruction: + // 0 ) Output : destination address (reg) + // 1-5) Input : va_list address (addr, i64mem) + // 6 ) ArgSize : Size (in bytes) of vararg type + // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset + // 8 ) Align : Alignment of type + // 9 ) EFLAGS (implicit-def) + + assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!"); + assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands"); + + unsigned DestReg = MI->getOperand(0).getReg(); + MachineOperand &Base = MI->getOperand(1); + MachineOperand &Scale = MI->getOperand(2); + MachineOperand &Index = MI->getOperand(3); + MachineOperand &Disp = MI->getOperand(4); + MachineOperand &Segment = MI->getOperand(5); + unsigned ArgSize = MI->getOperand(6).getImm(); + unsigned ArgMode = MI->getOperand(7).getImm(); + unsigned Align = MI->getOperand(8).getImm(); + + // Memory Reference + assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand"); + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + // Machine Information + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); + const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); + DebugLoc DL = MI->getDebugLoc(); + + // struct va_list { + // i32 gp_offset + // i32 fp_offset + // i64 overflow_area (address) + // i64 reg_save_area (address) + // } + // sizeof(va_list) = 24 + // alignment(va_list) = 8 + + unsigned TotalNumIntRegs = 6; + unsigned TotalNumXMMRegs = 8; + bool UseGPOffset = (ArgMode == 1); + bool UseFPOffset = (ArgMode == 2); + unsigned MaxOffset = TotalNumIntRegs * 8 + + (UseFPOffset ? TotalNumXMMRegs * 16 : 0); + + /* Align ArgSize to a multiple of 8 */ + unsigned ArgSizeA8 = (ArgSize + 7) & ~7; + bool NeedsAlign = (Align > 8); + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *overflowMBB; + MachineBasicBlock *offsetMBB; + MachineBasicBlock *endMBB; + + unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB + unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB + unsigned OffsetReg = 0; + + if (!UseGPOffset && !UseFPOffset) { + // If we only pull from the overflow region, we don't create a branch. + // We don't need to alter control flow. + OffsetDestReg = 0; // unused + OverflowDestReg = DestReg; + + offsetMBB = NULL; + overflowMBB = thisMBB; + endMBB = thisMBB; + } else { + // First emit code to check if gp_offset (or fp_offset) is below the bound. + // If so, pull the argument from reg_save_area. (branch to offsetMBB) + // If not, pull from overflow_area. (branch to overflowMBB) + // + // thisMBB + // | . + // | . + // offsetMBB overflowMBB + // | . + // | . + // endMBB + + // Registers for the PHI in endMBB + OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); + OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); + + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction *MF = MBB->getParent(); + overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); + offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); + endMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + + // Insert the new basic blocks + MF->insert(MBBIter, offsetMBB); + MF->insert(MBBIter, overflowMBB); + MF->insert(MBBIter, endMBB); + + // Transfer the remainder of MBB and its successor edges to endMBB. + endMBB->splice(endMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(MI)), + thisMBB->end()); + endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); + + // Make offsetMBB and overflowMBB successors of thisMBB + thisMBB->addSuccessor(offsetMBB); + thisMBB->addSuccessor(overflowMBB); + + // endMBB is a successor of both offsetMBB and overflowMBB + offsetMBB->addSuccessor(endMBB); + overflowMBB->addSuccessor(endMBB); + + // Load the offset value into a register + OffsetReg = MRI.createVirtualRegister(OffsetRegClass); + BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, UseFPOffset ? 4 : 0) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // Check if there is enough room left to pull this argument. + BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) + .addReg(OffsetReg) + .addImm(MaxOffset + 8 - ArgSizeA8); + + // Branch to "overflowMBB" if offset >= max + // Fall through to "offsetMBB" otherwise + BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE))) + .addMBB(overflowMBB); + } + + // In offsetMBB, emit code to use the reg_save_area. + if (offsetMBB) { + assert(OffsetReg != 0); + + // Read the reg_save_area address. + unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 16) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // Zero-extend the offset + unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) + .addImm(0) + .addReg(OffsetReg) + .addImm(X86::sub_32bit); + + // Add the offset to the reg_save_area to get the final address. + BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) + .addReg(OffsetReg64) + .addReg(RegSaveReg); + + // Compute the offset for the next argument + unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) + .addReg(OffsetReg) + .addImm(UseFPOffset ? 16 : 8); + + // Store it back into the va_list. + BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, UseFPOffset ? 4 : 0) + .addOperand(Segment) + .addReg(NextOffsetReg) + .setMemRefs(MMOBegin, MMOEnd); + + // Jump to endMBB + BuildMI(offsetMBB, DL, TII->get(X86::JMP_4)) + .addMBB(endMBB); + } + + // + // Emit code to use overflow area + // + + // Load the overflow_area address into a register. + unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 8) + .addOperand(Segment) + .setMemRefs(MMOBegin, MMOEnd); + + // If we need to align it, do so. Otherwise, just copy the address + // to OverflowDestReg. + if (NeedsAlign) { + // Align the overflow address + assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2"); + unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); + + // aligned_addr = (addr + (align-1)) & ~(align-1) + BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) + .addReg(OverflowAddrReg) + .addImm(Align-1); + + BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg) + .addReg(TmpReg) + .addImm(~(uint64_t)(Align-1)); + } else { + BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) + .addReg(OverflowAddrReg); + } + + // Compute the next overflow address after this argument. + // (the overflow address should be kept 8-byte aligned) + unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); + BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) + .addReg(OverflowDestReg) + .addImm(ArgSizeA8); + + // Store the new overflow address. + BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) + .addOperand(Base) + .addOperand(Scale) + .addOperand(Index) + .addDisp(Disp, 8) + .addOperand(Segment) + .addReg(NextAddrReg) + .setMemRefs(MMOBegin, MMOEnd); + + // If we branched, emit the PHI to the front of endMBB. + if (offsetMBB) { + BuildMI(*endMBB, endMBB->begin(), DL, + TII->get(X86::PHI), DestReg) + .addReg(OffsetDestReg).addMBB(offsetMBB) + .addReg(OverflowDestReg).addMBB(overflowMBB); + } + + // Erase the pseudo instruction + MI->eraseFromParent(); + + return endMBB; +} + +MachineBasicBlock * X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -9296,8 +10245,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; MachineMemOperand *MMO = F->getMachineMemOperand( - PseudoSourceValue::getFixedStack(RegSaveFrameIndex), - MachineMemOperand::MOStore, Offset, + MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset), + MachineMemOperand::MOStore, /*Size=*/16, /*Align=*/16); BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr)) .addFrameIndex(RegSaveFrameIndex) @@ -9389,7 +10338,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, } MachineBasicBlock * -X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, +X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -9399,8 +10348,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, // FIXME: The code should be tweaked as soon as we'll try to do codegen for // mingw-w64. + const char *StackProbeSymbol = + Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca") + .addExternalSymbol(StackProbeSymbol) .addReg(X86::EAX, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit) @@ -9418,30 +10370,30 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // our load from the relocation, sticking it in either RDI (x86-64) // or EAX and doing an indirect call. The return value will then // be in the normal return register. - const X86InstrInfo *TII + const X86InstrInfo *TII = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); MachineFunction *F = BB->getParent(); - bool IsWin64 = Subtarget->isTargetWin64(); - + + assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?"); assert(MI->getOperand(3).isGlobal() && "This should be a global"); - + if (Subtarget->is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) .addReg(X86::RIP) .addImm(0).addReg(0) - .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); - MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m)); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); addDirectMem(MIB, X86::RDI); } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) .addReg(0) .addImm(0).addReg(0) - .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); @@ -9451,13 +10403,13 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, TII->get(X86::MOV32rm), X86::EAX) .addReg(TII->getGlobalBaseReg(F)) .addImm(0).addReg(0) - .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, MI->getOperand(3).getTargetFlags()) .addReg(0); MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); addDirectMem(MIB, X86::EAX); } - + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -9467,13 +10419,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); - case X86::MINGW_ALLOCA: - return EmitLoweredMingwAlloca(MI, BB); + case X86::TAILJMPd64: + case X86::TAILJMPr64: + case X86::TAILJMPm64: + assert(!"TAILJMP64 would not be touched here."); + case X86::TCRETURNdi64: + case X86::TCRETURNri64: + case X86::TCRETURNmi64: + // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset. + // On AMD64, additional defs should be added before register allocation. + if (!Subtarget->isTargetWin64()) { + MI->addRegisterDefined(X86::RSI); + MI->addRegisterDefined(X86::RDI); + MI->addRegisterDefined(X86::XMM6); + MI->addRegisterDefined(X86::XMM7); + MI->addRegisterDefined(X86::XMM8); + MI->addRegisterDefined(X86::XMM9); + MI->addRegisterDefined(X86::XMM10); + MI->addRegisterDefined(X86::XMM11); + MI->addRegisterDefined(X86::XMM12); + MI->addRegisterDefined(X86::XMM13); + MI->addRegisterDefined(X86::XMM14); + MI->addRegisterDefined(X86::XMM15); + } + return BB; + case X86::WIN_ALLOCA: + return EmitLoweredWinAlloca(MI, BB); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); case X86::CMOV_GR8: - case X86::CMOV_V1I64: case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_V4F32: @@ -9583,6 +10558,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRM128MEM: return EmitPCMP(MI, BB, 5, true /* in mem */); + // Thread synchronization. + case X86::MONITOR: + return EmitMonitor(MI, BB); + case X86::MWAIT: + return EmitMwait(MI, BB); + // Atomic Lowering. case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, @@ -9747,6 +10728,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, false); case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); + + case X86::VAARG_64: + return EmitVAARG64WithCustomInserter(MI, BB); } } @@ -9773,6 +10757,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, default: break; case X86ISD::ADD: case X86ISD::SUB: + case X86ISD::ADC: + case X86ISD::SBB: case X86ISD::SMUL: case X86ISD::UMUL: case X86ISD::INC: @@ -9791,6 +10777,16 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, } } +unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + // SETCC_CARRY sets the dest to ~0 for true or 0 for false. + if (Op.getOpcode() == X86ISD::SETCC_CARRY) + return Op.getValueType().getScalarType().getSizeInBits(); + + // Fallback case. + return 1; +} + /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. bool X86TargetLowering::isGAPlusOffset(SDNode *N, @@ -9811,13 +10807,18 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, /// if the load addresses are consecutive, non-overlapping, and in the right /// order. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + TargetLowering::DAGCombinerInfo &DCI) { DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); if (VT.getSizeInBits() != 128) return SDValue(); + // Don't create instructions with illegal types after legalize types has run. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType())) + return SDValue(); + SmallVector<SDValue, 16> Elts; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); @@ -9877,8 +10878,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, // Store the value to a temporary stack slot. SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, - 0, false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, + MachinePointerInfo(), false, false, 0); // Replace each use (extract) with a load of the appropriate element. for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(), @@ -9893,11 +10894,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), - OffsetVal, StackPtr); + StackPtr, OffsetVal); // Load the scalar. SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, - ScalarAddr, NULL, 0, false, false, 0); + ScalarAddr, MachinePointerInfo(), + false, false, 0); // Replace the exact with the load. DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar); @@ -10473,6 +11475,36 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return SDValue(); } + +static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + // Want to form PANDN nodes, in the hopes of then easily combining them with + // OR and AND nodes to form PBLEND/PSIGN. + EVT VT = N->getValueType(0); + if (VT != MVT::v2i64) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Check LHS for vnot + if (N0.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode())) + return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1); + + // Check RHS for vnot + if (N1.getOpcode() == ISD::XOR && + ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode())) + return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0); + + return SDValue(); +} + static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -10480,12 +11512,99 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); EVT VT = N->getValueType(0); - if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64) return SDValue(); - // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + + // look for psign/blend + if (Subtarget->hasSSSE3()) { + if (VT == MVT::v2i64) { + // Canonicalize pandn to RHS + if (N0.getOpcode() == X86ISD::PANDN) + std::swap(N0, N1); + // or (and (m, x), (pandn m, y)) + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) { + SDValue Mask = N1.getOperand(0); + SDValue X = N1.getOperand(1); + SDValue Y; + if (N0.getOperand(0) == Mask) + Y = N0.getOperand(1); + if (N0.getOperand(1) == Mask) + Y = N0.getOperand(0); + + // Check to see if the mask appeared in both the AND and PANDN and + if (!Y.getNode()) + return SDValue(); + + // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. + if (Mask.getOpcode() != ISD::BITCAST || + X.getOpcode() != ISD::BITCAST || + Y.getOpcode() != ISD::BITCAST) + return SDValue(); + + // Look through mask bitcast. + Mask = Mask.getOperand(0); + EVT MaskVT = Mask.getValueType(); + + // Validate that the Mask operand is a vector sra node. The sra node + // will be an intrinsic. + if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN) + return SDValue(); + + // FIXME: what to do for bytes, since there is a psignb/pblendvb, but + // there is no psrai.b + switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) { + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + break; + default: return SDValue(); + } + + // Check that the SRA is all signbits. + SDValue SraC = Mask.getOperand(2); + unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + if ((SraAmt + 1) != EltBits) + return SDValue(); + + DebugLoc DL = N->getDebugLoc(); + + // Now we know we at least have a plendvb with the mask val. See if + // we can form a psignb/w/d. + // psign = x.type == y.type == mask.type && y = sub(0, x); + X = X.getOperand(0); + Y = Y.getOperand(0); + if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && + ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && + X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){ + unsigned Opc = 0; + switch (EltBits) { + case 8: Opc = X86ISD::PSIGNB; break; + case 16: Opc = X86ISD::PSIGNW; break; + case 32: Opc = X86ISD::PSIGND; break; + default: break; + } + if (Opc) { + SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1)); + return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign); + } + } + // PBLENDVB only available on SSE 4.1 + if (!Subtarget->hasSSE41()) + return SDValue(); + + X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); + Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); + Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); + Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask); + return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); + } + } + } + + // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) @@ -10600,9 +11719,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // pair instead. if (Subtarget->is64Bit() || F64IsLegal) { EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; - SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), - Ld->getBasePtr(), Ld->getSrcValue(), - Ld->getSrcValueOffset(), Ld->isVolatile(), + SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), + Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { @@ -10611,7 +11729,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, Ops.size()); } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), - St->getSrcValue(), St->getSrcValueOffset(), + St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); } @@ -10622,11 +11740,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(4, MVT::i32)); SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, - Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, - Ld->getSrcValue(), Ld->getSrcValueOffset()+4, + Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), MinAlign(Ld->getAlignment(), 4)); @@ -10643,12 +11761,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(4, MVT::i32)); SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, - St->getSrcValue(), St->getSrcValueOffset(), + St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, - St->getSrcValue(), - St->getSrcValueOffset() + 4, + St->getPointerInfo().getWithOffset(4), St->isVolatile(), St->isNonTemporal(), MinAlign(St->getAlignment(), 4)); @@ -10706,13 +11823,13 @@ static SDValue PerformBTCombine(SDNode *N, static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); - if (Op.getOpcode() == ISD::BIT_CONVERT) + if (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); EVT VT = N->getValueType(0), OpVT = Op.getValueType(); if (Op.getOpcode() == X86ISD::VZEXT_LOAD && VT.getVectorElementType().getSizeInBits() == OpVT.getVectorElementType().getSizeInBits()) { - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); } return SDValue(); } @@ -10743,19 +11860,106 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { + unsigned X86CC = N->getConstantOperandVal(0); + SDValue EFLAG = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Materialize "setb reg" as "sbb reg,reg", since it can be extended without + // a zext and produces an all-ones bit which is more useful than 0/1 in some + // cases. + if (X86CC == X86::COND_B) + return DAG.getNode(ISD::AND, DL, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), EFLAG), + DAG.getConstant(1, MVT::i8)); + + return SDValue(); +} + +// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS +static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, + X86TargetLowering::DAGCombinerInfo &DCI) { + // If the LHS and RHS of the ADC node are zero, then it can't overflow and + // the result is either zero or one (depending on the input carry bit). + // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1. + if (X86::isZeroNode(N->getOperand(0)) && + X86::isZeroNode(N->getOperand(1)) && + // We don't have a good way to replace an EFLAGS use, so only do this when + // dead right now. + SDValue(N, 1).use_empty()) { + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SDValue CarryOut = DAG.getConstant(0, N->getValueType(1)); + SDValue Res1 = DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B,MVT::i8), + N->getOperand(2)), + DAG.getConstant(1, VT)); + return DCI.CombineTo(N, Res1, CarryOut); + } + + return SDValue(); +} + +// fold (add Y, (sete X, 0)) -> adc 0, Y +// (add Y, (setne X, 0)) -> sbb -1, Y +// (sub (sete X, 0), Y) -> sbb 0, Y +// (sub (setne X, 0), Y) -> adc -1, Y +static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) { + DebugLoc DL = N->getDebugLoc(); + + // Look through ZExts. + SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0); + if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse()) + return SDValue(); + + SDValue SetCC = Ext.getOperand(0); + if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse()) + return SDValue(); + + X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0); + if (CC != X86::COND_E && CC != X86::COND_NE) + return SDValue(); + + SDValue Cmp = SetCC.getOperand(1); + if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || + !X86::isZeroNode(Cmp.getOperand(1)) || + !Cmp.getOperand(0).getValueType().isInteger()) + return SDValue(); + + SDValue CmpOp0 = Cmp.getOperand(0); + SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, + DAG.getConstant(1, CmpOp0.getValueType())); + + SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1); + if (CC == X86::COND_NE) + return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB, + DL, OtherVal.getValueType(), OtherVal, + DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp); + return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC, + DL, OtherVal.getValueType(), OtherVal, + DAG.getConstant(0, OtherVal.getValueType()), NewCmp); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { default: break; case ISD::EXTRACT_VECTOR_ELT: - return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); + return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case ISD::ADD: + case ISD::SUB: return OptimizeConditonalInDecrement(N, DAG); + case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); + case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: @@ -10764,8 +11968,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); + case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); case X86ISD::SHUFPS: // Handle all target specific shuffles case X86ISD::SHUFPD: + case X86ISD::PALIGN: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHDQ: @@ -10785,7 +11991,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PSHUFLW: case X86ISD::MOVSS: case X86ISD::MOVSD: - case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); + case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI); } return SDValue(); @@ -10892,44 +12098,14 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { // X86 Inline Assembly Support //===----------------------------------------------------------------------===// -static bool LowerToBSwap(CallInst *CI) { - // FIXME: this should verify that we are targetting a 486 or better. If not, - // we will turn this bswap into something that will be lowered to logical ops - // instead of emitting the bswap asm. For now, we don't support 486 or lower - // so don't worry about this. - - // Verify this is a simple bswap. - if (CI->getNumArgOperands() != 1 || - CI->getType() != CI->getArgOperand(0)->getType() || - !CI->getType()->isIntegerTy()) - return false; - - const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); - if (!Ty || Ty->getBitWidth() % 16 != 0) - return false; - - // Okay, we can do this xform, do so now. - const Type *Tys[] = { Ty }; - Module *M = CI->getParent()->getParent()->getParent(); - Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - - Value *Op = CI->getArgOperand(0); - Op = CallInst::Create(Int, Op, CI->getName(), CI); - - CI->replaceAllUsesWith(Op); - CI->eraseFromParent(); - return true; -} - bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); - std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints(); std::string AsmStr = IA->getAsmString(); // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" SmallVector<StringRef, 4> AsmPieces; - SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? + SplitString(AsmStr, AsmPieces, ";\n"); switch (AsmPieces.size()) { default: return false; @@ -10938,6 +12114,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. + // FIXME: this should verify that we are targetting a 486 or better. If not, + // we will turn this bswap into something that will be lowered to logical ops + // instead of emitting the bswap asm. For now, we don't support 486 or lower + // so don't worry about this. // bswap $0 if (AsmPieces.size() == 2 && (AsmPieces[0] == "bswap" || @@ -10947,7 +12127,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[1] == "${0:q}")) { // No need to check constraints, nothing other than the equivalent of // "=r,0" would be valid here. - return LowerToBSwap(CI); + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + return IntrinsicLowering::LowerToByteSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 if (CI->getType()->isIntegerTy(16) && @@ -10957,35 +12140,76 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces[2] == "${0:w}" && IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { AsmPieces.clear(); - const std::string &Constraints = IA->getConstraintString(); - SplitString(StringRef(Constraints).substr(5), AsmPieces, ","); + const std::string &ConstraintsStr = IA->getConstraintString(); + SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); std::sort(AsmPieces.begin(), AsmPieces.end()); if (AsmPieces.size() == 4 && AsmPieces[0] == "~{cc}" && AsmPieces[1] == "~{dirflag}" && AsmPieces[2] == "~{flags}" && AsmPieces[3] == "~{fpsr}") { - return LowerToBSwap(CI); + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + return IntrinsicLowering::LowerToByteSwap(CI); } } break; case 3: - if (CI->getType()->isIntegerTy(64) && - Constraints.size() >= 2 && - Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && - Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { - // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 + if (CI->getType()->isIntegerTy(32) && + IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { SmallVector<StringRef, 4> Words; - SplitString(AsmPieces[0], Words, " \t"); - if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { + SplitString(AsmPieces[0], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" && + Words[2] == "${0:w}") { Words.clear(); - SplitString(AsmPieces[1], Words, " \t"); - if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { + SplitString(AsmPieces[1], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" && + Words[2] == "$0") { Words.clear(); SplitString(AsmPieces[2], Words, " \t,"); - if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && - Words[2] == "%edx") { - return LowerToBSwap(CI); + if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" && + Words[2] == "${0:w}") { + AsmPieces.clear(); + const std::string &ConstraintsStr = IA->getConstraintString(); + SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") { + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + return IntrinsicLowering::LowerToByteSwap(CI); + } + } + } + } + } + + if (CI->getType()->isIntegerTy(64)) { + InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); + if (Constraints.size() >= 2 && + Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && + Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { + // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 + SmallVector<StringRef, 4> Words; + SplitString(AsmPieces[0], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { + Words.clear(); + SplitString(AsmPieces[1], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { + Words.clear(); + SplitString(AsmPieces[2], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && + Words[2] == "%edx") { + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + return IntrinsicLowering::LowerToByteSwap(CI); + } } } } @@ -11003,18 +12227,32 @@ X86TargetLowering::ConstraintType X86TargetLowering::getConstraintType(const std::string &Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { - case 'A': - return C_Register; - case 'f': - case 'r': case 'R': - case 'l': case 'q': case 'Q': - case 'x': + case 'f': + case 't': + case 'u': case 'y': + case 'x': case 'Y': return C_RegisterClass; + case 'a': + case 'b': + case 'c': + case 'd': + case 'S': + case 'D': + case 'A': + return C_Register; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'G': + case 'C': case 'e': case 'Z': return C_Other; @@ -11025,6 +12263,110 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + X86TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + const Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + case 'R': + case 'q': + case 'Q': + case 'a': + case 'b': + case 'c': + case 'd': + case 'S': + case 'D': + case 'A': + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_SpecificReg; + break; + case 'f': + case 't': + case 'u': + if (type->isFloatingPointTy()) + weight = CW_SpecificReg; + break; + case 'y': + if (type->isX86_MMXTy() && Subtarget->hasMMX()) + weight = CW_SpecificReg; + break; + case 'x': + case 'Y': + if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM()) + weight = CW_Register; + break; + case 'I': + if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) { + if (C->getZExtValue() <= 31) + weight = CW_Constant; + } + break; + case 'J': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if (C->getZExtValue() <= 63) + weight = CW_Constant; + } + break; + case 'K': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) + weight = CW_Constant; + } + break; + case 'L': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) + weight = CW_Constant; + } + break; + case 'M': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if (C->getZExtValue() <= 3) + weight = CW_Constant; + } + break; + case 'N': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if (C->getZExtValue() <= 0xff) + weight = CW_Constant; + } + break; + case 'G': + case 'C': + if (dyn_cast<ConstantFP>(CallOperandVal)) { + weight = CW_Constant; + } + break; + case 'e': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if ((C->getSExtValue() >= -0x80000000LL) && + (C->getSExtValue() <= 0x7fffffffLL)) + weight = CW_Constant; + } + break; + case 'Z': + if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { + if (C->getZExtValue() <= 0xffffffff) + weight = CW_Constant; + } + break; + } + return weight; +} + /// LowerXConstraint - try to replace an X constraint, which matches anything, /// with another that has more specific requirements based on the type of the /// corresponding operand. @@ -11033,9 +12375,9 @@ LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { - if (Subtarget->hasSSE2()) + if (Subtarget->hasXMMInt()) return "Y"; - if (Subtarget->hasSSE1()) + if (Subtarget->hasXMM()) return "x"; } @@ -11265,10 +12607,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, if (!Subtarget->hasMMX()) break; return std::make_pair(0U, X86::VR64RegisterClass); case 'Y': // SSE_REGS if SSE2 allowed - if (!Subtarget->hasSSE2()) break; + if (!Subtarget->hasXMMInt()) break; // FALL THROUGH. case 'x': // SSE_REGS if SSE1 allowed - if (!Subtarget->hasSSE1()) break; + if (!Subtarget->hasXMM()) break; switch (VT.getSimpleVT().SimpleTy) { default: break; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d2d9b28..419da37 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -57,35 +57,6 @@ namespace llvm { /// corresponds to X86::PSRLDQ. FSRL, - /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the - /// integer source in memory and FP reg result. This corresponds to the - /// X86::FILD*m instructions. It has three inputs (token chain, address, - /// and source type) and two outputs (FP value and token chain). FILD_FLAG - /// also produces a flag). - FILD, - FILD_FLAG, - - /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the - /// integer destination in memory and a FP reg source. This corresponds - /// to the X86::FIST*m instructions and the rounding mode change stuff. It - /// has two inputs (token chain and address) and two outputs (int value - /// and token chain). - FP_TO_INT16_IN_MEM, - FP_TO_INT32_IN_MEM, - FP_TO_INT64_IN_MEM, - - /// FLD - This instruction implements an extending load to FP stack slots. - /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain - /// operand, ptr to load from, and a ValueType node indicating the type - /// to load to. - FLD, - - /// FST - This instruction implements a truncating store to FP stack - /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, address, and a ValueType to store it - /// as. - FST, - /// CALL - These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: @@ -105,7 +76,7 @@ namespace llvm { /// CALL, - /// RDTSC_DAG - This operation implements the lowering for + /// RDTSC_DAG - This operation implements the lowering for /// readcyclecounter RDTSC_DAG, @@ -115,13 +86,13 @@ namespace llvm { /// X86 bit-test instructions. BT, - /// X86 SetCC. Operand 0 is condition code, and operand 1 is the flag - /// operand produced by a CMP instruction. + /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS + /// operand, usually produced by a CMP instruction. SETCC, // Same as SETCC except it's materialized with a sbb and the value is all // one's or all zero's. - SETCC_CARRY, + SETCC_CARRY, // R = carry_bit ? ~0 : 0 /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the @@ -157,11 +128,15 @@ namespace llvm { /// relative displacements. WrapperRIP, - /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector. - /// Can be used to move a vector value from a MMX register to a XMM - /// register. + /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word + /// of an XMM vector, with the high word zero filled. MOVQ2DQ, + /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector + /// to an MMX vector. If you think this is too close to the previous + /// mnemonic, so do I; blame Intel. + MOVDQ2Q, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, @@ -184,7 +159,16 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - + + /// PANDN - and with not'd value. + PANDN, + + /// PSIGNB/W/D - Copy integer sign. + PSIGNB, PSIGNW, PSIGND, + + /// PBLENDVB - Variable blend + PBLENDVB, + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, @@ -196,17 +180,14 @@ namespace llvm { // TLSADDR - Thread Local Storage. TLSADDR, - + // TLSCALL - Thread Local Storage. When calling to an OS provided // thunk at the address from an earlier relocation. TLSCALL, - // SegmentBaseAddress - The address segment:0 - SegmentBaseAddress, - // EH_RETURN - Exception Handling helpers. EH_RETURN, - + /// TC_RETURN - Tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) @@ -214,37 +195,29 @@ namespace llvm { /// operand #3 optional in flag TC_RETURN, - // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. - LCMPXCHG_DAG, - LCMPXCHG8_DAG, - - // FNSTCW16m - Store FP control world into i16 memory. - FNSTCW16m, - // VZEXT_MOVL - Vector move low and zero extend. VZEXT_MOVL, - // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. - VZEXT_LOAD, - // VSHL, VSRL - Vector logical left / right shift. VSHL, VSRL, // CMPPD, CMPPS - Vector double/float comparison. // CMPPD, CMPPS - Vector double/float comparison. CMPPD, CMPPS, - + // PCMP* - Vector integer comparisons. PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ, PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ, - // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results. - ADD, SUB, SMUL, UMUL, + // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. + ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, + + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. MUL_IMM, - + // PTEST - Vector bitwise comparisons PTEST, @@ -291,11 +264,17 @@ namespace llvm { // with control flow. VASTART_SAVE_XMM_REGS, - // MINGW_ALLOCA - MingW's __alloca call to do stack probing. - MINGW_ALLOCA, + // WIN_ALLOCA - Windows's _chkstk call to do stack probing. + WIN_ALLOCA, + + // Memory barrier + MEMBARRIER, + MFENCE, + SFENCE, + LFENCE, - // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, - // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, + // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, ATOMSUB64_DAG, @@ -304,12 +283,49 @@ namespace llvm { ATOMAND64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG, - - // Memory barrier - MEMBARRIER, - MFENCE, - SFENCE, - LFENCE + + // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. + LCMPXCHG_DAG, + LCMPXCHG8_DAG, + + // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. + VZEXT_LOAD, + + // FNSTCW16m - Store FP control world into i16 memory. + FNSTCW16m, + + /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the + /// integer destination in memory and a FP reg source. This corresponds + /// to the X86::FIST*m instructions and the rounding mode change stuff. It + /// has two inputs (token chain and address) and two outputs (int value + /// and token chain). + FP_TO_INT16_IN_MEM, + FP_TO_INT32_IN_MEM, + FP_TO_INT64_IN_MEM, + + /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the + /// integer source in memory and FP reg result. This corresponds to the + /// X86::FILD*m instructions. It has three inputs (token chain, address, + /// and source type) and two outputs (FP value and token chain). FILD_FLAG + /// also produces a flag). + FILD, + FILD_FLAG, + + /// FLD - This instruction implements an extending load to FP stack slots. + /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain + /// operand, ptr to load from, and a ValueType node indicating the type + /// to load to. + FLD, + + /// FST - This instruction implements a truncating store to FP stack + /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a + /// chain operand, value to store, address, and a ValueType to store it + /// as. + FST, + + /// VAARG_64 - This instruction grabs the address of the next argument + /// from a va_list. (reads and modifies the va_list in memory) + VAARG_64 // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be @@ -392,6 +408,16 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to PALIGNR. bool isPALIGNRMask(ShuffleVectorSDNode *N); + /// isVEXTRACTF128Index - Return true if the specified + /// EXTRACT_SUBVECTOR operand specifies a vector extract that is + /// suitable for input to VEXTRACTF128. + bool isVEXTRACTF128Index(SDNode *N); + + /// isVINSERTF128Index - Return true if the specified + /// INSERT_SUBVECTOR operand specifies a subvector insert that is + /// suitable for input to VINSERTF128. + bool isVINSERTF128Index(SDNode *N); + /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP* /// instructions. @@ -409,6 +435,16 @@ namespace llvm { /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. unsigned getShufflePALIGNRImmediate(SDNode *N); + /// getExtractVEXTRACTF128Immediate - Return the appropriate + /// immediate to extract the specified EXTRACT_SUBVECTOR index + /// with VEXTRACTF128 instructions. + unsigned getExtractVEXTRACTF128Immediate(SDNode *N); + + /// getInsertVINSERTF128Immediate - Return the appropriate + /// immediate to insert at the specified INSERT_SUBVECTOR index + /// with VINSERTF128 instructions. + unsigned getInsertVINSERTF128Immediate(SDNode *N); + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool isZeroNode(SDValue Elt); @@ -425,16 +461,13 @@ namespace llvm { public: explicit X86TargetLowering(X86TargetMachine &TM); - /// getPICBaseSymbol - Return the X86-32 PIC base. - MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const; - virtual unsigned getJumpTableEncoding() const; virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const; - + /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. virtual SDValue getPICJumpTableRelocBase(SDValue Table, @@ -442,7 +475,7 @@ namespace llvm { virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const; - + /// getStackPtrReg - Return the stack pointer register we are using: either /// ESP or RSP. unsigned getStackPtrReg() const { return X86StackPtr; } @@ -486,7 +519,7 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const; - + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; /// isTypeDesirableForOp - Return true if the target has native support for @@ -505,7 +538,7 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; - + /// getTargetNodeName - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -513,26 +546,36 @@ namespace llvm { /// getSetCCResultType - Return the ISD::SETCC ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; - /// computeMaskedBitsForTargetNode - Determine which of the bits specified - /// in Mask are known to be either zero or one and return them in the + /// computeMaskedBitsForTargetNode - Determine which of the bits specified + /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const; + // ComputeNumSignBitsForTargetNode - Determine the number of bits in the + // operation that are sign bits. + virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const; + virtual bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; - + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; virtual bool ExpandInlineAsm(CallInst *CI) const; - + ConstraintType getConstraintType(const std::string &Constraint) const; - - std::vector<unsigned> + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + virtual ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + + std::vector<unsigned> getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -546,15 +589,15 @@ namespace llvm { char ConstraintLetter, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - + /// getRegForInlineAsmConstraint - Given a physical register constraint /// (e.g. {edx}), return the register number and the register class for the /// register. This should only be used for C_Register constraints. On /// error, this returns a register number of 0. - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass*> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; @@ -609,7 +652,7 @@ namespace llvm { // shrink long double fp constant since fldt is very slow. return !X86ScalarSSEf64 || VT == MVT::f80; } - + const X86Subtarget* getSubtarget() const { return Subtarget; } @@ -650,8 +693,8 @@ namespace llvm { /// X86StackPtr - X86 physical register used as stack ptr. unsigned X86StackPtr; - - /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 + + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. /// When SSE is available, use it for f32 operations. /// When SSE2 is available, use it for f64 operations. @@ -702,7 +745,6 @@ namespace llvm { SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, DebugLoc dl) const; - CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG) const; @@ -719,6 +761,8 @@ namespace llvm { SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, @@ -729,7 +773,7 @@ namespace llvm { SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; - SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; @@ -794,6 +838,8 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; + virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, @@ -810,6 +856,13 @@ namespace llvm { MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, unsigned argNum, bool inMem) const; + /// Utility functions to emit monitor and mwait instructions. These + /// need to make sure that the arguments to the intrinsic are in the + /// correct registers. + MachineBasicBlock *EmitMonitor(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const; + /// Utility function to emit atomic bitwise operations (and, or, xor). /// It takes the bitwise instruction to expand, the associated machine basic /// block, and the associated X86 opcodes for reg/reg and reg/imm. @@ -833,7 +886,7 @@ namespace llvm { unsigned immOpcL, unsigned immOpcH, bool invSrc = false) const; - + /// Utility function to emit atomic min and max. It takes the min/max /// instruction to expand, the associated basic block, and the associated /// cmov opcode for moving the min or max value. @@ -841,6 +894,11 @@ namespace llvm { MachineBasicBlock *BB, unsigned cmovOpc) const; + // Utility function to emit the low-level va_arg code for X86-64. + MachineBasicBlock *EmitVAARG64WithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const; + /// Utility function to emit the xmm reg save portion of va_start. MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *BInstr, @@ -849,12 +907,15 @@ namespace llvm { MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, + MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; - + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, + MachineBasicBlock *BB) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td new file mode 100644 index 0000000..45d1c6b --- /dev/null +++ b/lib/Target/X86/X86Instr3DNow.td @@ -0,0 +1,77 @@ +//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the 3DNow! instruction set, which extends MMX to support +// floating point and also adds a few more random instructions for good measure. +// +//===----------------------------------------------------------------------===// + +// FIXME: We don't support any intrinsics for these instructions yet. + +class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> { +} + +class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic> + : I<o, F, (outs VR64:$dst), ins, + !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>, + TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode { + // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. + let isAsmParserOnly = 1; +} + + +let Constraints = "$src1 = $dst" in { + // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. + // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. + multiclass I3DNow_binop_rm<bits<8> opc, string Mn> { + def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>; + def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>; + } +} + +defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">; +defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">; +defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">; +defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">; +defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">; +defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">; +defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">; +defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">; +defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">; +defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">; +defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">; +defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">; +defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">; +defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">; +defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">; +defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">; +defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">; +defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">; +defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">; + + +def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; + +def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), + "prefetch $addr", []>; + +// FIXME: Diassembler gets a bogus decode conflict. +let isAsmParserOnly = 1 in { +def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), + "prefetchw $addr", []>; +} + +// "3DNowA" instructions +defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">; +defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">; +defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">; +defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">; +defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">; diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td deleted file mode 100644 index 0884b61..0000000 --- a/lib/Target/X86/X86Instr64bit.td +++ /dev/null @@ -1,2250 +0,0 @@ -//====- X86Instr64bit.td - Describe X86-64 Instructions ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the X86-64 instruction set, defining the instructions, -// and properties of the instructions which are needed for code generation, -// machine code emission, and analysis. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Operand Definitions. -// - -// 64-bits but only 32 bits are significant. -def i64i32imm : Operand<i64> { - let ParserMatchClass = ImmSExti64i32AsmOperand; -} - -// 64-bits but only 32 bits are significant, and those bits are treated as being -// pc relative. -def i64i32imm_pcrel : Operand<i64> { - let PrintMethod = "print_pcrel_imm"; - let ParserMatchClass = X86AbsMemAsmOperand; -} - - -// 64-bits but only 8 bits are significant. -def i64i8imm : Operand<i64> { - let ParserMatchClass = ImmSExti64i8AsmOperand; -} - -def lea64_32mem : Operand<i32> { - let PrintMethod = "printi32mem"; - let AsmOperandLowerMethod = "lower_lea64_32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} - - -// Special i64mem for addresses of load folding tail calls. These are not -// allowed to use callee-saved registers since they must be scheduled -// after callee-saved register are popped. -def i64mem_TC : Operand<i64> { - let PrintMethod = "printi64mem"; - let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} - -//===----------------------------------------------------------------------===// -// Complex Pattern Definitions. -// -def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr", - [add, sub, mul, X86mul_imm, shl, or, frameindex, - X86WrapperRIP], []>; - -def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", - [tglobaltlsaddr], []>; - -//===----------------------------------------------------------------------===// -// Pattern fragments. -// - -def i64immSExt8 : PatLeaf<(i64 immSext8)>; - -def GetLo32XForm : SDNodeXForm<imm, [{ - // Transformation function: get the low 32 bits. - return getI32Imm((unsigned)N->getZExtValue()); -}]>; - -def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>; - - -def i64immZExt32 : PatLeaf<(i64 imm), [{ - // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // unsignedsign extended field. - return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); -}]>; - -def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>; -def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>; -def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>; - -def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>; -def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>; -def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>; -def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>; - -def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; -def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; -def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; -def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; - -//===----------------------------------------------------------------------===// -// Instruction list... -// - -// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into -// a stack adjustment and the codegen must know that they may modify the stack -// pointer before prolog-epilog rewriting occurs. -// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become -// sub / add which can clobber EFLAGS. -let Defs = [RSP, EFLAGS], Uses = [RSP] in { -def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt), - "#ADJCALLSTACKDOWN", - [(X86callseq_start timm:$amt)]>, - Requires<[In64BitMode]>; -def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), - "#ADJCALLSTACKUP", - [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[In64BitMode]>; -} - -// Interrupt Instructions -def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iret{q}", []>; - -//===----------------------------------------------------------------------===// -// Call Instructions... -// -let isCall = 1 in - // All calls clobber the non-callee saved registers. RSP is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { - - // NOTE: this pattern doesn't match "X86call imm", because we do not know - // that the offset between an arbitrary immediate and the call will fit in - // the 32-bit pcrel field that we have. - def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call{q}\t$dst", []>, - Requires<[In64BitMode, NotWin64]>; - def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[NotWin64]>; - def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, - Requires<[NotWin64]>; - - def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), - "lcall{q}\t{*}$dst", []>; - } - - // FIXME: We need to teach codegen about single list of call-clobbered - // registers. -let isCall = 1, isCodeGenOnly = 1 in - // All calls clobber the non-callee saved registers. RSP is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], - Uses = [RSP] in { - def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i64i32imm_pcrel:$dst, variable_ops), - "call\t$dst", []>, - Requires<[IsWin64]>; - def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call\t{*}$dst", - [(X86call GR64:$dst)]>, Requires<[IsWin64]>; - def WINCALL64m : I<0xFF, MRM2m, (outs), - (ins i64mem:$dst, variable_ops), "call\t{*}$dst", - [(X86call (loadi64 addr:$dst))]>, - Requires<[IsWin64]>; - } - - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1 in - let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { - def TCRETURNdi64 : I<0, Pseudo, (outs), - (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset, - variable_ops), - "#TC_RETURN $dst $offset", []>; - let mayLoad = 1 in - def TCRETURNmi64 : I<0, Pseudo, (outs), - (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - - def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i64i32imm_pcrel:$dst, variable_ops), - "jmp\t$dst # TAILCALL", []>; - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", []>; - - let mayLoad = 1 in - def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), - "jmp{q}\t{*}$dst # TAILCALL", []>; -} - -// Branches -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { - def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), - "jmp{q}\t$dst", []>; - def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", - [(brind GR64:$dst)]>, Requires<[In64BitMode]>; - def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", - [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>; - def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), - "ljmp{q}\t{*}$dst", []>; -} - -//===----------------------------------------------------------------------===// -// EH Pseudo Instructions -// -let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, isCodeGenOnly = 1 in { -def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), - "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)]>; - -} - -//===----------------------------------------------------------------------===// -// Miscellaneous Instructions... -// - -def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; -let mayLoad = 1 in -def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; - -let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in -def LEAVE64 : I<0xC9, RawFrm, - (outs), (ins), "leave", []>, Requires<[In64BitMode]>; -let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in { -def POP64r : I<0x58, AddRegFrm, - (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>; -} -let mayStore = 1 in { -def PUSH64r : I<0x50, AddRegFrm, - (outs), (ins GR64:$reg), "push{q}\t$reg", []>; -def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; -def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; -} -} - -let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), - "push{q}\t$imm", []>; -def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), - "push{q}\t$imm", []>; -def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), - "push{q}\t$imm", []>; -} - -let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in -def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>, - Requires<[In64BitMode]>; -let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in -def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>, - Requires<[In64BitMode]>; - -def LEA64_32r : I<0x8D, MRMSrcMem, - (outs GR32:$dst), (ins lea64_32mem:$src), - "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; - -let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "lea{q}\t{$src|$dst}, {$dst|$src}", - [(set GR64:$dst, lea64addr:$src)]>; - -let Constraints = "$src = $dst" in -def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), - "bswap{q}\t$dst", - [(set GR64:$dst, (bswap GR64:$src))]>, TB; - -// Bit scan instructions. -let Defs = [EFLAGS] in { -def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB; -def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB; - -def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB; -def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB; -} // Defs = [EFLAGS] - -// Repeat string ops -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in -def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", - [(X86rep_movs i64)]>, REP; -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in -def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", - [(X86rep_stos i64)]>, REP; - -let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in -def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; - -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in -def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; - -def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; - -def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; - -// Fast system-call instructions -def SYSEXIT64 : RI<0x35, RawFrm, - (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>; - -//===----------------------------------------------------------------------===// -// Move Instructions... -// - -let neverHasSideEffects = 1 in -def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), - "movabs{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, imm:$src)]>; -def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, i64immSExt32:$src)]>; -} - -// The assembler accepts movq of a 64-bit immediate as an alternate spelling of -// movabsq. -let isAsmParserOnly = 1 in { -def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -} - -let isCodeGenOnly = 1 in { -def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -} - -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (load addr:$src))]>; - -def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - [(store GR64:$src, addr:$dst)]>; -def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - [(store i64immSExt32:$src, addr:$dst)]>; - -/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC. -let isCodeGenOnly = 1 in { -let neverHasSideEffects = 1 in -def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; - -let mayLoad = 1, - canFoldAsLoad = 1, isReMaterializable = 1 in -def MOV64rm_TC : RI<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - []>; - -let mayStore = 1 in -def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src), - "mov{q}\t{$src, $dst|$dst, $src}", - []>; -} - -// FIXME: These definitions are utterly broken -// Just leave them commented out for now because they're useless outside -// of the large code model, and most compilers won't generate the instructions -// in question. -/* -def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{q}\t{$src, %rax|%rax, $src}", []>; -def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), - "mov{q}\t{$src, %rax|%rax, $src}", []>; -def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{q}\t{%rax, $dst|$dst, %rax}", []>; -def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), - "mov{q}\t{%rax, $dst|$dst, %rax}", []>; -*/ - -// Moves to and from segment registers -def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; -def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; - -// Moves to and from debug registers -def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; - -// Moves to and from control registers -def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; - -// Sign/Zero extenders - -// MOVSX64rr8 always has a REX prefix and it has an 8-bit register -// operand, which makes it a rare instruction with an 8-bit register -// operand that can never access an h register. If support for h registers -// were generalized, this would require a special register class. -def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))]>, TB; -def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB; -def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))]>, TB; -def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB; -def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), - "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))]>; -def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), - "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; - -// movzbq and movzwq encodings for the disassembler -def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; -def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; -def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; -def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; - -// Use movzbl instead of movzbq when the destination is a register; it's -// equivalent due to implicit zero-extending, and it has a smaller encoding. -def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))]>, TB; -def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; -// Use movzwl instead of movzwq when the destination is a register; it's -// equivalent due to implicit zero-extending, and it has a smaller encoding. -def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))]>, TB; -def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; - -// There's no movzlq instruction, but movl can be used for this purpose, using -// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero -// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit -// zero-extension, however this isn't possible when the 32-bit value is -// defined by a truncate or is copied from something where the high bits aren't -// necessarily all zero. In such cases, we fall back to these explicit zext -// instructions. -def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))]>; -def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), - "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; - -// Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. And x86's cmov doesn't do anything if the -// condition is false. But any other 32-bit operation will zero-extend -// up to 64 bits. -def def32 : PatLeaf<(i32 GR32:$src), [{ - return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg && - N->getOpcode() != X86ISD::CMOV; -}]>; - -// In the case of a 32-bit def that is known to implicitly zero-extend, -// we can use a SUBREG_TO_REG. -def : Pat<(i64 (zext def32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; - -let neverHasSideEffects = 1 in { - let Defs = [RAX], Uses = [EAX] in - def CDQE : RI<0x98, RawFrm, (outs), (ins), - "{cltq|cdqe}", []>; // RAX = signext(EAX) - - let Defs = [RAX,RDX], Uses = [RAX] in - def CQO : RI<0x99, RawFrm, (outs), (ins), - "{cqto|cqo}", []>; // RDX:RAX = signext(RAX) -} - -//===----------------------------------------------------------------------===// -// Arithmetic Instructions... -// - -let Defs = [EFLAGS] in { - -def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src), - "add{q}\t{$src, %rax|%rax, $src}", []>; - -let Constraints = "$src1 = $dst" in { -let isConvertibleToThreeAddress = 1 in { -let isCommutable = 1 in -// Register-Register Addition -def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86add_flag GR64:$src1, GR64:$src2))]>; - -// These are alternate spellings for use by the disassembler, we mark them as -// code gen only to ensure they aren't matched by the assembler. -let isCodeGenOnly = 1 in { - def ADD64rr_alt : RI<0x03, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -// Register-Integer Addition -def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86add_flag GR64:$src1, i64immSExt8:$src2))]>; -def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86add_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isConvertibleToThreeAddress - -// Register-Memory Addition -def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86add_flag GR64:$src1, (load addr:$src2)))]>; - -} // Constraints = "$src1 = $dst" - -// Memory-Register Addition -def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), GR64:$src2), addr:$dst), - (implicit EFLAGS)]>; -def ADD64mi8 : RIi8<0x83, MRM0m, (outs), (ins i64mem:$dst, i64i8imm :$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), i64immSExt8:$src2), addr:$dst), - (implicit EFLAGS)]>; -def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), - "add{q}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), i64immSExt32:$src2), addr:$dst), - (implicit EFLAGS)]>; - -let Uses = [EFLAGS] in { - -def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src), - "adc{q}\t{$src, %rax|%rax, $src}", []>; - -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>; - -let isCodeGenOnly = 1 in { -def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), - (ins GR64:$src1, GR64:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", []>; -} - -def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, (load addr:$src2)))]>; - -def ADC64ri8 : RIi8<0x83, MRM2r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, i64immSExt8:$src2))]>; -def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), GR64:$src2), addr:$dst)]>; -def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt8:$src2), - addr:$dst)]>; -def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), - "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt32:$src2), - addr:$dst)]>; -} // Uses = [EFLAGS] - -let Constraints = "$src1 = $dst" in { -// Register-Register Subtraction -def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86sub_flag GR64:$src1, GR64:$src2))]>; - -let isCodeGenOnly = 1 in { -def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", []>; -} - -// Register-Memory Subtraction -def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86sub_flag GR64:$src1, (load addr:$src2)))]>; - -// Register-Integer Subtraction -def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>; -def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src), - "sub{q}\t{$src, %rax|%rax, $src}", []>; - -// Memory-Register Subtraction -def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR64:$src2), addr:$dst), - (implicit EFLAGS)]>; - -// Memory-Integer Subtraction -def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>; -def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), - "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)]>; - -let Uses = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { -def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>; - -let isCodeGenOnly = 1 in { -def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", []>; -} - -def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, (load addr:$src2)))]>; - -def SBB64ri8 : RIi8<0x83, MRM3r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, i64immSExt8:$src2))]>; -def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src), - "sbb{q}\t{$src, %rax|%rax, $src}", []>; - -def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>; -def SBB64mi8 : RIi8<0x83, MRM3m, (outs), (ins i64mem:$dst, i64i8imm :$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; -def SBB64mi32 : RIi32<0x81, MRM3m, (outs), (ins i64mem:$dst, i64i32imm:$src2), - "sbb{q}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; -} // Uses = [EFLAGS] -} // Defs = [EFLAGS] - -// Unsigned multiplication -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in { -def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64 -let mayLoad = 1 in -def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] - -// Signed multiplication -def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), - "imul{q}\t$src", []>; // RAX,RDX = RAX*GR64 -let mayLoad = 1 in -def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] -} - -let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -// Register-Register Signed Integer Multiplication -def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "imul{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, GR64:$src2))]>, TB; - -// Register-Memory Signed Integer Multiplication -def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "imul{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; -} // Constraints = "$src1 = $dst" - -// Suprisingly enough, these are not two address instructions! - -// Register-Integer Signed Integer Multiplication -def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 - (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), - "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>; -def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 - (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), - "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>; - -// Memory-Integer Signed Integer Multiplication -def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 - (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), - "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), - i64immSExt8:$src2))]>; -def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 - (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), - "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), - i64immSExt32:$src2))]>; -} // Defs = [EFLAGS] - -// Unsigned division / remainder -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in { -// RDX:RAX/r64 = RAX,RDX -def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), - "div{q}\t$src", []>; -// Signed division / remainder -// RDX:RAX/r64 = RAX,RDX -def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), - "idiv{q}\t$src", []>; -let mayLoad = 1 in { -// RDX:RAX/[mem64] = RAX,RDX -def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", []>; -// RDX:RAX/[mem64] = RAX,RDX -def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", []>; -} -} - -// Unary instructions -let Defs = [EFLAGS], CodeSize = 2 in { -let Constraints = "$src = $dst" in -def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst", - [(set GR64:$dst, (ineg GR64:$src)), - (implicit EFLAGS)]>; -def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", - [(store (ineg (loadi64 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; - -let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in -def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>; -def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", - [(store (add (loadi64 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>; - -let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in -def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>; -def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", - [(store (add (loadi64 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>; - -// In 64-bit mode, single byte INC and DEC cannot be encoded. -let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in { -// Can transform into LEA. -def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), - "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>, - OpSize, Requires<[In64BitMode]>; -def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src), - "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>, - Requires<[In64BitMode]>; -def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src), - "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>, - OpSize, Requires<[In64BitMode]>; -def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), - "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, - Requires<[In64BitMode]>; -} // Constraints = "$src = $dst", isConvertibleToThreeAddress - -// These are duplicates of their 32-bit counterparts. Only needed so X86 knows -// how to unfold them. -def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; -def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; -def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; -def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; -} // Defs = [EFLAGS], CodeSize - - -let Defs = [EFLAGS] in { -// Shift instructions -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), - "shl{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (shl GR64:$src1, CL))]>; -let isConvertibleToThreeAddress = 1 in // Can transform into LEA. -def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "shl{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; -// NOTE: We don't include patterns for shifts of a register by one, because -// 'add reg,reg' is cheaper. -def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), - "shl{q}\t$dst", []>; -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in -def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), - "shl{q}\t{%cl, $dst|$dst, %CL}", - [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>; -def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src), - "shl{q}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), - "shl{q}\t$dst", - [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), - "shr{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (srl GR64:$src1, CL))]>; -def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), - "shr{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; -def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), - "shr{q}\t$dst", - [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in -def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), - "shr{q}\t{%cl, $dst|$dst, %CL}", - [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>; -def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src), - "shr{q}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), - "shr{q}\t$dst", - [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), - "sar{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (sra GR64:$src1, CL))]>; -def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "sar{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; -def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), - "sar{q}\t$dst", - [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; -} // Constraints = "$src = $dst" - -let Uses = [CL] in -def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), - "sar{q}\t{%cl, $dst|$dst, %CL}", - [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>; -def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src), - "sar{q}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), - "sar{q}\t$dst", - [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -// Rotate instructions - -let Constraints = "$src = $dst" in { -def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; - -def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), - "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; - -let Uses = [CL] in { -def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -} -} // Constraints = "$src = $dst" - -def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; - -let Uses = [CL] in { -def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -} - -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), - "rol{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotl GR64:$src1, CL))]>; -def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "rol{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; -def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), - "rol{q}\t$dst", - [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in -def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), - "rol{q}\t{%cl, $dst|$dst, %CL}", - [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; -def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src), - "rol{q}\t{$src, $dst|$dst, $src}", - [(store (rotl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), - "rol{q}\t$dst", - [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in -def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), - "ror{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotr GR64:$src1, CL))]>; -def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "ror{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; -def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), - "ror{q}\t$dst", - [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in -def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), - "ror{q}\t{%cl, $dst|$dst, %CL}", - [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>; -def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src), - "ror{q}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), - "ror{q}\t$dst", - [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; - -// Double shift instructions (generalizations of rotate) -let Constraints = "$src1 = $dst" in { -let Uses = [CL] in { -def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, - TB; -def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, - TB; -} - -let isCommutable = 1 in { // FIXME: Update X86InstrInfo::commuteInstruction -def SHLD64rri8 : RIi8<0xA4, MRMDestReg, - (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2, i8imm:$src3), - "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, - (i8 imm:$src3)))]>, - TB; -def SHRD64rri8 : RIi8<0xAC, MRMDestReg, - (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2, i8imm:$src3), - "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, - (i8 imm:$src3)))]>, - TB; -} // isCommutable -} // Constraints = "$src1 = $dst" - -let Uses = [CL] in { -def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; -def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", - [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), - addr:$dst)]>, TB; -} -def SHLD64mri8 : RIi8<0xA4, MRMDestMem, - (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), - "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB; -def SHRD64mri8 : RIi8<0xAC, MRMDestMem, - (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), - "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB; -} // Defs = [EFLAGS] - -//===----------------------------------------------------------------------===// -// Logical Instructions... -// - -let Constraints = "$src = $dst" , AddedComplexity = 15 in -def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst", - [(set GR64:$dst, (not GR64:$src))]>; -def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", - [(store (not (loadi64 addr:$dst)), addr:$dst)]>; - -let Defs = [EFLAGS] in { -def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src), - "and{q}\t{$src, %rax|%rax, $src}", []>; - -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def AND64rr : RI<0x21, MRMDestReg, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86and_flag GR64:$src1, GR64:$src2))]>; -let isCodeGenOnly = 1 in { -def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", []>; -} -def AND64rm : RI<0x23, MRMSrcMem, - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86and_flag GR64:$src1, (load addr:$src2)))]>; -def AND64ri8 : RIi8<0x83, MRM4r, - (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86and_flag GR64:$src1, i64immSExt8:$src2))]>; -def AND64ri32 : RIi32<0x81, MRM4r, - (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), - "and{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86and_flag GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def AND64mr : RI<0x21, MRMDestMem, - (outs), (ins i64mem:$dst, GR64:$src), - "and{q}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), GR64:$src), addr:$dst), - (implicit EFLAGS)]>; -def AND64mi8 : RIi8<0x83, MRM4m, - (outs), (ins i64mem:$dst, i64i8imm :$src), - "and{q}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), i64immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; -def AND64mi32 : RIi32<0x81, MRM4m, - (outs), (ins i64mem:$dst, i64i32imm:$src), - "and{q}\t{$src, $dst|$dst, $src}", - [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), - (implicit EFLAGS)]>; - -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86or_flag GR64:$src1, GR64:$src2))]>; -let isCodeGenOnly = 1 in { -def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", []>; -} -def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86or_flag GR64:$src1, (load addr:$src2)))]>; -def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86or_flag GR64:$src1, i64immSExt8:$src2))]>; -def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), - (ins GR64:$src1, i64i32imm:$src2), - "or{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86or_flag GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "or{q}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), GR64:$src), addr:$dst), - (implicit EFLAGS)]>; -def OR64mi8 : RIi8<0x83, MRM1m, (outs), (ins i64mem:$dst, i64i8imm:$src), - "or{q}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), i64immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; -def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), - "or{q}\t{$src, $dst|$dst, $src}", - [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), - (implicit EFLAGS)]>; - -def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src), - "or{q}\t{$src, %rax|%rax, $src}", []>; - -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in -def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86xor_flag GR64:$src1, GR64:$src2))]>; -let isCodeGenOnly = 1 in { -def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", []>; -} -def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$src1, i64mem:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86xor_flag GR64:$src1, (load addr:$src2)))]>; -def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst), - (ins GR64:$src1, i64i8imm:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>; -def XOR64ri32 : RIi32<0x81, MRM6r, - (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), - "xor{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, EFLAGS, - (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>; -} // Constraints = "$src1 = $dst" - -def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "xor{q}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), GR64:$src), addr:$dst), - (implicit EFLAGS)]>; -def XOR64mi8 : RIi8<0x83, MRM6m, (outs), (ins i64mem:$dst, i64i8imm :$src), - "xor{q}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), i64immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; -def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), - "xor{q}\t{$src, $dst|$dst, $src}", - [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), - (implicit EFLAGS)]>; - -def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src), - "xor{q}\t{$src, %rax|%rax, $src}", []>; - -} // Defs = [EFLAGS] - -//===----------------------------------------------------------------------===// -// Comparison Instructions... -// - -// Integer comparison -let Defs = [EFLAGS] in { -def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src), - "test{q}\t{$src, %rax|%rax, $src}", []>; -let isCommutable = 1 in -def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "test{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>; -def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), - "test{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)), - 0))]>; -def TEST64ri32 : RIi32<0xF7, MRM0r, (outs), - (ins GR64:$src1, i64i32imm:$src2), - "test{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2), - 0))]>; -def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), - (ins i64mem:$src1, i64i32imm:$src2), - "test{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1), - i64immSExt32:$src2), 0))]>; - - -def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src), - "cmp{q}\t{$src, %rax|%rax, $src}", []>; -def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>; - -// These are alternate spellings for use by the disassembler, we mark them as -// code gen only to ensure they aren't matched by the assembler. -let isCodeGenOnly = 1 in { - def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; -} - -def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>; -def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>; -def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>; -def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>; -def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi64 addr:$src1), - i64immSExt8:$src2))]>; -def CMP64mi32 : RIi32<0x81, MRM7m, (outs), - (ins i64mem:$src1, i64i32imm:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi64 addr:$src1), - i64immSExt32:$src2))]>; -} // Defs = [EFLAGS] - -// Bit tests. -// TODO: BTC, BTR, and BTS -let Defs = [EFLAGS] in { -def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB; - -// Unlike with the register+register form, the memory+register form of the -// bt instruction does not ignore the high bits of the index. From ISel's -// perspective, this is pretty bizarre. Disable these instructions for now. -def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", -// [(X86bt (loadi64 addr:$src1), GR64:$src2), -// (implicit EFLAGS)] - [] - >, TB; - -def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; -// Note that these instructions don't need FastBTMem because that -// only applies when the other operand is in a register. When it's -// an immediate, bt is still fast. -def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt (loadi64 addr:$src1), - i64immSExt8:$src2))]>, TB; - -def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; - -def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; - -def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; -} // Defs = [EFLAGS] - -// Conditional moves -let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { -def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_B, EFLAGS))]>, TB; -def CMOVAE64rr: RI<0x43, MRMSrcReg, // if >=u, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovae{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_AE, EFLAGS))]>, TB; -def CMOVE64rr : RI<0x44, MRMSrcReg, // if ==, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmove{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_E, EFLAGS))]>, TB; -def CMOVNE64rr: RI<0x45, MRMSrcReg, // if !=, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovne{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_NE, EFLAGS))]>, TB; -def CMOVBE64rr: RI<0x46, MRMSrcReg, // if <=u, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovbe{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_BE, EFLAGS))]>, TB; -def CMOVA64rr : RI<0x47, MRMSrcReg, // if >u, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmova{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_A, EFLAGS))]>, TB; -def CMOVL64rr : RI<0x4C, MRMSrcReg, // if <s, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovl{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_L, EFLAGS))]>, TB; -def CMOVGE64rr: RI<0x4D, MRMSrcReg, // if >=s, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovge{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_GE, EFLAGS))]>, TB; -def CMOVLE64rr: RI<0x4E, MRMSrcReg, // if <=s, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovle{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_LE, EFLAGS))]>, TB; -def CMOVG64rr : RI<0x4F, MRMSrcReg, // if >s, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovg{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_G, EFLAGS))]>, TB; -def CMOVS64rr : RI<0x48, MRMSrcReg, // if signed, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovs{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_S, EFLAGS))]>, TB; -def CMOVNS64rr: RI<0x49, MRMSrcReg, // if !signed, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovns{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_NS, EFLAGS))]>, TB; -def CMOVP64rr : RI<0x4A, MRMSrcReg, // if parity, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovp{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_P, EFLAGS))]>, TB; -def CMOVNP64rr : RI<0x4B, MRMSrcReg, // if !parity, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovnp{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_NP, EFLAGS))]>, TB; -def CMOVO64rr : RI<0x40, MRMSrcReg, // if overflow, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovo{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_O, EFLAGS))]>, TB; -def CMOVNO64rr : RI<0x41, MRMSrcReg, // if !overflow, GR64 = GR64 - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "cmovno{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, GR64:$src2, - X86_COND_NO, EFLAGS))]>, TB; -} // isCommutable = 1 - -def CMOVB64rm : RI<0x42, MRMSrcMem, // if <u, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovb{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_B, EFLAGS))]>, TB; -def CMOVAE64rm: RI<0x43, MRMSrcMem, // if >=u, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovae{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_AE, EFLAGS))]>, TB; -def CMOVE64rm : RI<0x44, MRMSrcMem, // if ==, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmove{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_E, EFLAGS))]>, TB; -def CMOVNE64rm: RI<0x45, MRMSrcMem, // if !=, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovne{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_NE, EFLAGS))]>, TB; -def CMOVBE64rm: RI<0x46, MRMSrcMem, // if <=u, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovbe{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_BE, EFLAGS))]>, TB; -def CMOVA64rm : RI<0x47, MRMSrcMem, // if >u, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmova{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_A, EFLAGS))]>, TB; -def CMOVL64rm : RI<0x4C, MRMSrcMem, // if <s, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovl{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_L, EFLAGS))]>, TB; -def CMOVGE64rm: RI<0x4D, MRMSrcMem, // if >=s, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovge{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_GE, EFLAGS))]>, TB; -def CMOVLE64rm: RI<0x4E, MRMSrcMem, // if <=s, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovle{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_LE, EFLAGS))]>, TB; -def CMOVG64rm : RI<0x4F, MRMSrcMem, // if >s, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovg{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_G, EFLAGS))]>, TB; -def CMOVS64rm : RI<0x48, MRMSrcMem, // if signed, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovs{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_S, EFLAGS))]>, TB; -def CMOVNS64rm: RI<0x49, MRMSrcMem, // if !signed, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovns{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_NS, EFLAGS))]>, TB; -def CMOVP64rm : RI<0x4A, MRMSrcMem, // if parity, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovp{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_P, EFLAGS))]>, TB; -def CMOVNP64rm : RI<0x4B, MRMSrcMem, // if !parity, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovnp{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_NP, EFLAGS))]>, TB; -def CMOVO64rm : RI<0x40, MRMSrcMem, // if overflow, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovo{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_O, EFLAGS))]>, TB; -def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] - (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "cmovno{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), - X86_COND_NO, EFLAGS))]>, TB; -} // Constraints = "$src1 = $dst" - -// Use sbb to materialize carry flag into a GPR. -// FIXME: This are pseudo ops that should be replaced with Pat<> patterns. -// However, Pat<> can't replicate the destination reg into the inputs of the -// result. -// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces -// X86CodeEmitter. -let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in -def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; - -def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C64r)>; - -//===----------------------------------------------------------------------===// -// Descriptor-table support instructions - -// LLDT is not interpreted specially in 64-bit mode because there is no sign -// extension. -def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), - "sldt{q}\t$dst", []>, TB; -def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), - "sldt{q}\t$dst", []>, TB; - -//===----------------------------------------------------------------------===// -// Alias Instructions -//===----------------------------------------------------------------------===// - -// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a -// smaller encoding, but doing so at isel time interferes with rematerialization -// in the current register allocator. For now, this is rewritten when the -// instruction is lowered to an MCInst. -// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove -// when we have a better way to specify isel priority. -let Defs = [EFLAGS], - AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)]>; - -// Materialize i64 constant where top 32-bits are zero. This could theoretically -// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however -// that would make it more difficult to rematerialize. -let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), - "", [(set GR64:$dst, i64immZExt32:$src)]>; - -//===----------------------------------------------------------------------===// -// Thread Local Storage Instructions -//===----------------------------------------------------------------------===// - -// ELF TLS Support -// All calls clobber the non-callee saved registers. RSP is marked as -// a use to prevent stack-pointer assignments that appear immediately -// before calls from potentially appearing dead. -let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in -def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), - ".byte\t0x66; " - "leaq\t$sym(%rip), %rdi; " - ".word\t0x6666; " - "rex64; " - "call\t__tls_get_addr@PLT", - [(X86tlsaddr tls64addr:$sym)]>, - Requires<[In64BitMode]>; - -// Darwin TLS Support -// For x86_64, the address of the thunk is passed in %rdi, on return -// the address of the variable is in %rax. All other registers are preserved. -let Defs = [RAX], - Uses = [RDI], - usesCustomInserter = 1 in -def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), - "# TLSCall_64", - [(X86TLSCall addr:$sym)]>, - Requires<[In64BitMode]>; - -let AddedComplexity = 5, isCodeGenOnly = 1 in -def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "movq\t%gs:$src, $dst", - [(set GR64:$dst, (gsload addr:$src))]>, SegGS; - -let AddedComplexity = 5, isCodeGenOnly = 1 in -def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "movq\t%fs:$src, $dst", - [(set GR64:$dst, (fsload addr:$src))]>, SegFS; - -//===----------------------------------------------------------------------===// -// Atomic Instructions -//===----------------------------------------------------------------------===// - -// TODO: Get this to fold the constant into the instruction. -let hasSideEffects = 1, Defs = [ESP] in -def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero), - "lock\n\t" - "or{q}\t{$zero, (%rsp)|(%rsp), $zero}", - [(X86MemBarrierNoSSE GR64:$zero)]>, - Requires<[In64BitMode]>, LOCK; - -let Defs = [RAX, EFLAGS], Uses = [RAX] in { -def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), - "lock\n\t" - "cmpxchgq\t$swap,$ptr", - [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; -} - -let Constraints = "$val = $dst" in { -let Defs = [EFLAGS] in -def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), - "lock\n\t" - "xadd\t$val, $ptr", - [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, - TB, LOCK; - -def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), - (ins GR64:$val,i64mem:$ptr), - "xchg{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; - -def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), - "xchg{q}\t{$val, $src|$src, $val}", []>; -} - -def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; -let mayLoad = 1, mayStore = 1 in -def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; - -def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; -let mayLoad = 1, mayStore = 1 in -def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; - -let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in -def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), - "cmpxchg16b\t$dst", []>, TB; - -def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), - "xchg{q}\t{$src, %rax|%rax, $src}", []>; - -// Optimized codegen when the non-memory output is not used. -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { -// FIXME: Use normal add / sub instructions and add lock prefix dynamically. -def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "lock\n\t" - "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), - (ins i64mem:$dst, i64i8imm :$src2), - "lock\n\t" - "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), - (ins i64mem:$dst, i64i32imm :$src2), - "lock\n\t" - "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - "lock\n\t" - "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), - (ins i64mem:$dst, i64i8imm :$src2), - "lock\n\t" - "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), - (ins i64mem:$dst, i64i32imm:$src2), - "lock\n\t" - "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), - "lock\n\t" - "inc{q}\t$dst", []>, LOCK; -def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), - "lock\n\t" - "dec{q}\t$dst", []>, LOCK; -} -// Atomic exchange, and, or, xor -let Constraints = "$val = $dst", Defs = [EFLAGS], - usesCustomInserter = 1 in { -def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMAND64 PSEUDO!", - [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>; -def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMOR64 PSEUDO!", - [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>; -def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMXOR64 PSEUDO!", - [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>; -def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMNAND64 PSEUDO!", - [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>; -def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), - "#ATOMMIN64 PSEUDO!", - [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>; -def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMMAX64 PSEUDO!", - [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>; -def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMUMIN64 PSEUDO!", - [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>; -def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMUMAX64 PSEUDO!", - [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; -} - -// Segmentation support instructions - -// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. -def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; -def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), - "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; - -def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; -def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; - -def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; - -def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), - "push{q}\t%fs", []>, TB; -def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), - "push{q}\t%gs", []>, TB; - -def POPFS64 : I<0xa1, RawFrm, (outs), (ins), - "pop{q}\t%fs", []>, TB; -def POPGS64 : I<0xa9, RawFrm, (outs), (ins), - "pop{q}\t%gs", []>, TB; - -def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lss{q}\t{$src, $dst|$dst, $src}", []>, TB; -def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB; -def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB; - -// Specialized register support - -// no m form encodable; use SMSW16m -def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), - "smsw{q}\t$dst", []>, TB; - -// String manipulation instructions - -def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small -// code model mode, should use 'movabs'. FIXME: This is really a hack, the -// 'movabs' predicate should handle this sort of thing. -def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri tconstpool :$dst)>, Requires<[FarData]>; -def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri tjumptable :$dst)>, Requires<[FarData]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; -def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; -def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), - (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>; - -// In static codegen with small code model, we can get the address of a label -// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of -// the MOV64ri64i32 should accept these. -def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), - (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>; - -// In kernel code model, we can get the address of a label -// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of -// the MOV64ri32 should accept these. -def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; -def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), - (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; - -// If we have small model and -static mode, it is safe to store global addresses -// directly as immediates. FIXME: This is really a hack, the 'imm' predicate -// for MOV64mi32 should handle this sort of thing. -def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tconstpool:$src)>, - Requires<[NearData, IsStatic]>; -def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tjumptable:$src)>, - Requires<[NearData, IsStatic]>; -def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tglobaladdr:$src)>, - Requires<[NearData, IsStatic]>; -def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), - (MOV64mi32 addr:$dst, texternalsym:$src)>, - Requires<[NearData, IsStatic]>; -def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tblockaddress:$src)>, - Requires<[NearData, IsStatic]>; - -// Calls -// Direct PC relative function call for small code model. 32-bit displacement -// sign extended to 64-bit. -def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; -def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; - -def : Pat<(X86call (i64 tglobaladdr:$dst)), - (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; -def : Pat<(X86call (i64 texternalsym:$dst)), - (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; - -// tailcall stuff -def : Pat<(X86tcret GR64_TC:$dst, imm:$off), - (TCRETURNri64 GR64_TC:$dst, imm:$off)>, - Requires<[In64BitMode]>; - -def : Pat<(X86tcret (load addr:$dst), imm:$off), - (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode]>; - -def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), - (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, - Requires<[In64BitMode]>; - -def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), - (TCRETURNdi64 texternalsym:$dst, imm:$off)>, - Requires<[In64BitMode]>; - -// tls has some funny stuff here... -// This corresponds to movabs $foo@tpoff, %rax -def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), - (MOV64ri tglobaltlsaddr :$dst)>; -// This corresponds to add $foo@tpoff, %rax -def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), - (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; -// This corresponds to mov foo@tpoff(%rbx), %eax -def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), - (MOV64rm tglobaltlsaddr :$dst)>; - -// Comparisons. - -// TEST R,R is smaller than CMP R,0 -def : Pat<(X86cmp GR64:$src1, 0), - (TEST64rr GR64:$src1, GR64:$src1)>; - -// Conditional moves with folded loads with operands swapped and conditions -// inverted. -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS), - (CMOVAE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS), - (CMOVB64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS), - (CMOVNE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS), - (CMOVE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS), - (CMOVA64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS), - (CMOVBE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS), - (CMOVGE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS), - (CMOVL64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS), - (CMOVG64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS), - (CMOVLE64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS), - (CMOVNP64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS), - (CMOVP64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS), - (CMOVNS64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS), - (CMOVS64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS), - (CMOVNO64rm GR64:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS), - (CMOVO64rm GR64:$src2, addr:$src1)>; - -// zextload bool -> zextload byte -def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; - -// extload -// When extloading from 16-bit and smaller memory locations into 64-bit -// registers, use zero-extending loads so that the entire 64-bit register is -// defined, avoiding partial-register updates. -def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; -def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; -def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; -// For other extloads, use subregs, since the high contents of the register are -// defined after an extload. -def : Pat<(extloadi64i32 addr:$src), - (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), - sub_32bit)>; - -// anyext. Define these to do an explicit zero-extend to -// avoid partial-register updates. -def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; -def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; -def : Pat<(i64 (anyext GR32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; - -//===----------------------------------------------------------------------===// -// Some peepholes -//===----------------------------------------------------------------------===// - -// Odd encoding trick: -128 fits into an 8-bit immediate field while -// +128 doesn't, so in this special case use a sub instead of an add. -def : Pat<(add GR64:$src1, 128), - (SUB64ri8 GR64:$src1, -128)>; -def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), - (SUB64mi8 addr:$dst, -128)>; - -// The same trick applies for 32-bit immediate fields in 64-bit -// instructions. -def : Pat<(add GR64:$src1, 0x0000000080000000), - (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; -def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), - (SUB64mi32 addr:$dst, 0xffffffff80000000)>; - -// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it -// has an immediate with at least 32 bits of leading zeros, to avoid needing to -// materialize that immediate in a register first. -def : Pat<(and GR64:$src, i64immZExt32:$imm), - (SUBREG_TO_REG - (i64 0), - (AND32ri - (EXTRACT_SUBREG GR64:$src, sub_32bit), - (i32 (GetLo32XForm imm:$imm))), - sub_32bit)>; - -// r & (2^32-1) ==> movz -def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), - (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; -// r & (2^16-1) ==> movz -def : Pat<(and GR64:$src, 0xffff), - (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; -// r & (2^8-1) ==> movz -def : Pat<(and GR64:$src, 0xff), - (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; -// r & (2^8-1) ==> movz -def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, - Requires<[In64BitMode]>; -// r & (2^8-1) ==> movz -def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>, - Requires<[In64BitMode]>; - -// sext_inreg patterns -def : Pat<(sext_inreg GR64:$src, i32), - (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; -def : Pat<(sext_inreg GR64:$src, i16), - (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; -def : Pat<(sext_inreg GR64:$src, i8), - (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; -def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, - Requires<[In64BitMode]>; -def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>, - Requires<[In64BitMode]>; - -// trunc patterns -def : Pat<(i32 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, sub_32bit)>; -def : Pat<(i16 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, sub_16bit)>; -def : Pat<(i8 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, sub_8bit)>; -def : Pat<(i8 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, sub_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(i8 (trunc GR16:$src)), - (EXTRACT_SUBREG GR16:$src, sub_8bit)>, - Requires<[In64BitMode]>; - -// h-register tricks. -// For now, be conservative on x86-64 and use an h-register extract only if the -// value is immediately zero-extended or stored, which are somewhat common -// cases. This uses a bunch of code to prevent a register requiring a REX prefix -// from being allocated in the same instruction as the h register, as there's -// currently no way to describe this requirement to the register allocator. - -// h-register extract and zero-extend. -def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), - (SUBREG_TO_REG - (i64 0), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - sub_8bit_hi)), - sub_32bit)>; -def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), - (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, - GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(srl GR16:$src, (i8 8)), - (EXTRACT_SUBREG - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)), - sub_16bit)>, - Requires<[In64BitMode]>; -def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), - (SUBREG_TO_REG - (i64 0), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)), - sub_32bit)>; -def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), - (SUBREG_TO_REG - (i64 0), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)), - sub_32bit)>; - -// h-register extract and store. -def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), - (MOV8mr_NOREX - addr:$dst, - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - sub_8bit_hi))>; -def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), - (MOV8mr_NOREX - addr:$dst, - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), - (MOV8mr_NOREX - addr:$dst, - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In64BitMode]>; - -// (shl x, 1) ==> (add x, x) -def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; - -// (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL, 63)), - (SHL64rCL GR64:$src1)>; -def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), - (SHL64mCL addr:$dst)>; - -def : Pat<(srl GR64:$src1, (and CL, 63)), - (SHR64rCL GR64:$src1)>; -def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), - (SHR64mCL addr:$dst)>; - -def : Pat<(sra GR64:$src1, (and CL, 63)), - (SAR64rCL GR64:$src1)>; -def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), - (SAR64mCL addr:$dst)>; - -// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. -let AddedComplexity = 5 in { // Try this before the selecting to OR -def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), - (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2), - (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(or_is_add GR64:$src1, GR64:$src2), - (ADD64rr GR64:$src1, GR64:$src2)>; -} // AddedComplexity - -// X86 specific add which produces a flag. -def : Pat<(addc GR64:$src1, GR64:$src2), - (ADD64rr GR64:$src1, GR64:$src2)>; -def : Pat<(addc GR64:$src1, (load addr:$src2)), - (ADD64rm GR64:$src1, addr:$src2)>; -def : Pat<(addc GR64:$src1, i64immSExt8:$src2), - (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(addc GR64:$src1, i64immSExt32:$src2), - (ADD64ri32 GR64:$src1, imm:$src2)>; - -def : Pat<(subc GR64:$src1, GR64:$src2), - (SUB64rr GR64:$src1, GR64:$src2)>; -def : Pat<(subc GR64:$src1, (load addr:$src2)), - (SUB64rm GR64:$src1, addr:$src2)>; -def : Pat<(subc GR64:$src1, i64immSExt8:$src2), - (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(subc GR64:$src1, imm:$src2), - (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; - -//===----------------------------------------------------------------------===// -// EFLAGS-defining Patterns -//===----------------------------------------------------------------------===// - -// addition -def : Pat<(add GR64:$src1, GR64:$src2), - (ADD64rr GR64:$src1, GR64:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt8:$src2), - (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt32:$src2), - (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), - (ADD64rm GR64:$src1, addr:$src2)>; - -// subtraction -def : Pat<(sub GR64:$src1, GR64:$src2), - (SUB64rr GR64:$src1, GR64:$src2)>; -def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), - (SUB64rm GR64:$src1, addr:$src2)>; -def : Pat<(sub GR64:$src1, i64immSExt8:$src2), - (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(sub GR64:$src1, i64immSExt32:$src2), - (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// Multiply -def : Pat<(mul GR64:$src1, GR64:$src2), - (IMUL64rr GR64:$src1, GR64:$src2)>; -def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), - (IMUL64rm GR64:$src1, addr:$src2)>; -def : Pat<(mul GR64:$src1, i64immSExt8:$src2), - (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(mul GR64:$src1, i64immSExt32:$src2), - (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2), - (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; -def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), - (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; - -// inc/dec -def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; -def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; - -// or -def : Pat<(or GR64:$src1, GR64:$src2), - (OR64rr GR64:$src1, GR64:$src2)>; -def : Pat<(or GR64:$src1, i64immSExt8:$src2), - (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(or GR64:$src1, i64immSExt32:$src2), - (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), - (OR64rm GR64:$src1, addr:$src2)>; - -// xor -def : Pat<(xor GR64:$src1, GR64:$src2), - (XOR64rr GR64:$src1, GR64:$src2)>; -def : Pat<(xor GR64:$src1, i64immSExt8:$src2), - (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(xor GR64:$src1, i64immSExt32:$src2), - (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), - (XOR64rm GR64:$src1, addr:$src2)>; - -// and -def : Pat<(and GR64:$src1, GR64:$src2), - (AND64rr GR64:$src1, GR64:$src2)>; -def : Pat<(and GR64:$src1, i64immSExt8:$src2), - (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(and GR64:$src1, i64immSExt32:$src2), - (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; -def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), - (AND64rm GR64:$src1, addr:$src2)>; - -//===----------------------------------------------------------------------===// -// X86-64 SSE Instructions -//===----------------------------------------------------------------------===// - -// Move instructions... - -def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector GR64:$src)))]>; -def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), - (iPTR 0)))]>; - -def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert GR64:$src))]>; -def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), - "movq\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; - -def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert FR64:$src))]>; -def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), - "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; - diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td new file mode 100644 index 0000000..f0ea068 --- /dev/null +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -0,0 +1,1125 @@ +//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the integer arithmetic instructions in the X86 +// architecture. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LEA - Load Effective Address + +let neverHasSideEffects = 1 in +def LEA16r : I<0x8D, MRMSrcMem, + (outs GR16:$dst), (ins i32mem:$src), + "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; +let isReMaterializable = 1 in +def LEA32r : I<0x8D, MRMSrcMem, + (outs GR32:$dst), (ins i32mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", + [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; + +def LEA64_32r : I<0x8D, MRMSrcMem, + (outs GR32:$dst), (ins lea64_32mem:$src), + "lea{l}\t{$src|$dst}, {$dst|$src}", + [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; + +let isReMaterializable = 1 in +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "lea{q}\t{$src|$dst}, {$dst|$src}", + [(set GR64:$dst, lea64addr:$src)]>; + + + +//===----------------------------------------------------------------------===// +// Fixed-Register Multiplication and Division Instructions. +// + +// Extra precision multiplication + +// AL is really implied by AX, but the registers in Defs must match the +// SDNode results (i8, i32). +let Defs = [AL,EFLAGS,AX], Uses = [AL] in +def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", + // FIXME: Used for 8-bit mul, ignore result upper 8 bits. + // This probably ought to be moved to a def : Pat<> if the + // syntax can be accepted. + [(set AL, (mul AL, GR8:$src)), + (implicit EFLAGS)]>; // AL,AH = AL*GR8 + +let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in +def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), + "mul{w}\t$src", + []>, OpSize; // AX,DX = AX*GR16 + +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in +def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), + "mul{l}\t$src", // EAX,EDX = EAX*GR32 + [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>; +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), + "mul{q}\t$src", // RAX,RDX = RAX*GR64 + [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; + +let Defs = [AL,EFLAGS,AX], Uses = [AL] in +def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), + "mul{b}\t$src", + // FIXME: Used for 8-bit mul, ignore result upper 8 bits. + // This probably ought to be moved to a def : Pat<> if the + // syntax can be accepted. + [(set AL, (mul AL, (loadi8 addr:$src))), + (implicit EFLAGS)]>; // AL,AH = AL*[mem8] + +let mayLoad = 1, neverHasSideEffects = 1 in { +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), + "mul{w}\t$src", + []>, OpSize; // AX,DX = AX*[mem16] + +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in +def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), + "mul{l}\t$src", + []>; // EAX,EDX = EAX*[mem32] +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), + "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] +} + +let neverHasSideEffects = 1 in { +let Defs = [AL,EFLAGS,AX], Uses = [AL] in +def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; + // AL,AH = AL*GR8 +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, + OpSize; // AX,DX = AX*GR16 +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in +def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; + // EAX,EDX = EAX*GR32 +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>; + // RAX,RDX = RAX*GR64 + +let mayLoad = 1 in { +let Defs = [AL,EFLAGS,AX], Uses = [AL] in +def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), + "imul{b}\t$src", []>; // AL,AH = AL*[mem8] +let Defs = [AX,DX,EFLAGS], Uses = [AX] in +def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), + "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in +def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), + "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), + "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] +} +} // neverHasSideEffects + + +let Defs = [EFLAGS] in { +let Constraints = "$src1 = $dst" in { + +let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +// Register-Register Signed Integer Multiply +def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), + "imul{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize; +def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), + "imul{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, GR32:$src2))]>, TB; +def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "imul{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, GR64:$src2))]>, TB; +} + +// Register-Memory Signed Integer Multiply +def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), + (ins GR16:$src1, i16mem:$src2), + "imul{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, (load addr:$src2)))]>, + TB, OpSize; +def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), + "imul{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB; +def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), + "imul{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; +} // Constraints = "$src1 = $dst" + +} // Defs = [EFLAGS] + +// Suprisingly enough, these are not two address instructions! +let Defs = [EFLAGS] in { +// Register-Integer Signed Integer Multiply +def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 + (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize; +def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 + (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, + OpSize; +def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, imm:$src2))]>; +def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 + (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>; +def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 + (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>; +def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 + (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>; + + +// Memory-Integer Signed Integer Multiply +def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 + (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), imm:$src2))]>, + OpSize; +def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 + (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), + "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR16:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i16immSExt8:$src2))]>, OpSize; +def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 + (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), imm:$src2))]>; +def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 + (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), + "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i32immSExt8:$src2))]>; +def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 + (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i64immSExt32:$src2))]>; +def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 + (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), + "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR64:$dst, EFLAGS, + (X86smul_flag (load addr:$src1), + i64immSExt8:$src2))]>; +} // Defs = [EFLAGS] + + + + +// unsigned division/remainder +let Defs = [AL,EFLAGS,AX], Uses = [AX] in +def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH + "div{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX + "div{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in +def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX + "div{l}\t$src", []>; +// RDX:RAX/r64 = RAX,RDX +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in +def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), + "div{q}\t$src", []>; + +let mayLoad = 1 in { +let Defs = [AL,EFLAGS,AX], Uses = [AX] in +def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH + "div{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX + "div{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX +def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), + "div{l}\t$src", []>; +// RDX:RAX/[mem64] = RAX,RDX +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in +def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), + "div{q}\t$src", []>; +} + +// Signed division/remainder. +let Defs = [AL,EFLAGS,AX], Uses = [AX] in +def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH + "idiv{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX + "idiv{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in +def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX + "idiv{l}\t$src", []>; +// RDX:RAX/r64 = RAX,RDX +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in +def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), + "idiv{q}\t$src", []>; + +let mayLoad = 1, mayLoad = 1 in { +let Defs = [AL,EFLAGS,AX], Uses = [AX] in +def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH + "idiv{b}\t$src", []>; +let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in +def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX + "idiv{w}\t$src", []>, OpSize; +let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX +def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), + "idiv{l}\t$src", []>; +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX +def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), + "idiv{q}\t$src", []>; +} + +//===----------------------------------------------------------------------===// +// Two address Instructions. +// + +// unary instructions +let CodeSize = 2 in { +let Defs = [EFLAGS] in { +let Constraints = "$src1 = $dst" in { +def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), + "neg{b}\t$dst", + [(set GR8:$dst, (ineg GR8:$src1)), + (implicit EFLAGS)]>; +def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "neg{w}\t$dst", + [(set GR16:$dst, (ineg GR16:$src1)), + (implicit EFLAGS)]>, OpSize; +def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "neg{l}\t$dst", + [(set GR32:$dst, (ineg GR32:$src1)), + (implicit EFLAGS)]>; +def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", + [(set GR64:$dst, (ineg GR64:$src1)), + (implicit EFLAGS)]>; +} // Constraints = "$src1 = $dst" + +def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), + "neg{b}\t$dst", + [(store (ineg (loadi8 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; +def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), + "neg{w}\t$dst", + [(store (ineg (loadi16 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>, OpSize; +def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), + "neg{l}\t$dst", + [(store (ineg (loadi32 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; +def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", + [(store (ineg (loadi64 addr:$dst)), addr:$dst), + (implicit EFLAGS)]>; +} // Defs = [EFLAGS] + + +// Note: NOT does not set EFLAGS! + +let Constraints = "$src1 = $dst" in { +// Match xor -1 to not. Favors these over a move imm + xor to save code size. +let AddedComplexity = 15 in { +def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), + "not{b}\t$dst", + [(set GR8:$dst, (not GR8:$src1))]>; +def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "not{w}\t$dst", + [(set GR16:$dst, (not GR16:$src1))]>, OpSize; +def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "not{l}\t$dst", + [(set GR32:$dst, (not GR32:$src1))]>; +def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", + [(set GR64:$dst, (not GR64:$src1))]>; +} +} // Constraints = "$src1 = $dst" + +def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), + "not{b}\t$dst", + [(store (not (loadi8 addr:$dst)), addr:$dst)]>; +def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), + "not{w}\t$dst", + [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; +def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), + "not{l}\t$dst", + [(store (not (loadi32 addr:$dst)), addr:$dst)]>; +def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", + [(store (not (loadi64 addr:$dst)), addr:$dst)]>; +} // CodeSize + +// TODO: inc/dec is slow for P4, but fast for Pentium-M. +let Defs = [EFLAGS] in { +let Constraints = "$src1 = $dst" in { +let CodeSize = 2 in +def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "inc{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; + +let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), + "inc{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, + OpSize, Requires<[In32BitMode]>; +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), + "inc{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, + Requires<[In32BitMode]>; +def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst", + [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>; +} // isConvertibleToThreeAddress = 1, CodeSize = 1 + + +// In 64-bit mode, single byte INC and DEC cannot be encoded. +let isConvertibleToThreeAddress = 1, CodeSize = 2 in { +// Can transform into LEA. +def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), + "inc{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), + "inc{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, + Requires<[In64BitMode]>; +def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), + "dec{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), + "dec{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, + Requires<[In64BitMode]>; +} // isConvertibleToThreeAddress = 1, CodeSize = 2 + +} // Constraints = "$src1 = $dst" + +let CodeSize = 2 in { + def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", + [(store (add (loadi8 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>; + def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In32BitMode]>; + def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In32BitMode]>; + def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", + [(store (add (loadi64 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>; + +// These are duplicates of their 32-bit counterparts. Only needed so X86 knows +// how to unfold them. +// FIXME: What is this for?? +def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +} // CodeSize = 2 + +let Constraints = "$src1 = $dst" in { +let CodeSize = 2 in +def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "dec{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; +let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), + "dec{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, + OpSize, Requires<[In32BitMode]>; +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), + "dec{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, + Requires<[In32BitMode]>; +def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", + [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>; +} // CodeSize = 2 +} // Constraints = "$src1 = $dst" + + +let CodeSize = 2 in { + def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", + [(store (add (loadi8 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>; + def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In32BitMode]>; + def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In32BitMode]>; + def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", + [(store (add (loadi64 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>; +} // CodeSize = 2 +} // Defs = [EFLAGS] + + +/// X86TypeInfo - This is a bunch of information that describes relevant X86 +/// information about value types. For example, it can tell you what the +/// register class and preferred load to use. +class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass, + PatFrag loadnode, X86MemOperand memoperand, ImmType immkind, + Operand immoperand, SDPatternOperator immoperator, + Operand imm8operand, SDPatternOperator imm8operator, + bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> { + /// VT - This is the value type itself. + ValueType VT = vt; + + /// InstrSuffix - This is the suffix used on instructions with this type. For + /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q". + string InstrSuffix = instrsuffix; + + /// RegClass - This is the register class associated with this type. For + /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64. + RegisterClass RegClass = regclass; + + /// LoadNode - This is the load node associated with this type. For + /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64. + PatFrag LoadNode = loadnode; + + /// MemOperand - This is the memory operand associated with this type. For + /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem. + X86MemOperand MemOperand = memoperand; + + /// ImmEncoding - This is the encoding of an immediate of this type. For + /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32 + /// since the immediate fields of i64 instructions is a 32-bit sign extended + /// value. + ImmType ImmEncoding = immkind; + + /// ImmOperand - This is the operand kind of an immediate of this type. For + /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 -> + /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign + /// extended value. + Operand ImmOperand = immoperand; + + /// ImmOperator - This is the operator that should be used to match an + /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32). + SDPatternOperator ImmOperator = immoperator; + + /// Imm8Operand - This is the operand kind to use for an imm8 of this type. + /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is + /// only used for instructions that have a sign-extended imm8 field form. + Operand Imm8Operand = imm8operand; + + /// Imm8Operator - This is the operator that should be used to match an 8-bit + /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8). + SDPatternOperator Imm8Operator = imm8operator; + + /// HasOddOpcode - This bit is true if the instruction should have an odd (as + /// opposed to even) opcode. Operations on i8 are usually even, operations on + /// other datatypes are odd. + bit HasOddOpcode = hasOddOpcode; + + /// HasOpSizePrefix - This bit is set to true if the instruction should have + /// the 0x66 operand size prefix. This is set for i16 types. + bit HasOpSizePrefix = hasOpSizePrefix; + + /// HasREX_WPrefix - This bit is set to true if the instruction should have + /// the 0x40 REX prefix. This is set for i64 types. + bit HasREX_WPrefix = hasREX_WPrefix; +} + +def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">; + + +def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem , + Imm8 , i8imm , imm, i8imm , invalid_node, + 0, 0, 0>; +def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, + Imm16, i16imm, imm, i16i8imm, i16immSExt8, + 1, 1, 0>; +def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem, + Imm32, i32imm, imm, i32i8imm, i32immSExt8, + 1, 0, 0>; +def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, + Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8, + 1, 0, 1>; + +/// ITy - This instruction base class takes the type info for the instruction. +/// Using this, it: +/// 1. Concatenates together the instruction mnemonic with the appropriate +/// suffix letter, a tab, and the arguments. +/// 2. Infers whether the instruction should have a 0x66 prefix byte. +/// 3. Infers whether the instruction should have a 0x40 REX_W prefix. +/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations) +/// or 1 (for i16,i32,i64 operations). +class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, + string mnemonic, string args, list<dag> pattern> + : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4}, + opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode }, + f, outs, ins, + !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> { + + // Infer instruction prefixes from type info. + let hasOpSizePrefix = typeinfo.HasOpSizePrefix; + let hasREX_WPrefix = typeinfo.HasREX_WPrefix; +} + +// BinOpRR - Instructions like "add reg, reg, reg". +class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + dag outlist, list<dag> pattern, Format f = MRMDestReg> + : ITy<opcode, f, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + +// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has +// just a regclass (no eflags) as a result. +class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + +// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has +// just a EFLAGS as a result. +class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDPatternOperator opnode, Format f = MRMDestReg> + : BinOpRR<opcode, mnemonic, typeinfo, (outs), + [(set EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + f>; + +// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has +// both a regclass and EFLAGS as a result. +class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + +// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has +// both a regclass and EFLAGS as a result, and has EFLAGS as input. +class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2, + EFLAGS))]>; + +// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding). +class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> + : ITy<opcode, MRMSrcReg, typeinfo, + (outs typeinfo.RegClass:$dst), + (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), + mnemonic, "{$src2, $dst|$dst, $src2}", []> { + // The disassembler should know about this, but not the asmparser. + let isCodeGenOnly = 1; +} + +// BinOpRM - Instructions like "add reg, reg, [mem]". +class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + dag outlist, list<dag> pattern> + : ITy<opcode, MRMSrcMem, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + +// BinOpRM_R - Instructions like "add reg, reg, [mem]". +class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, + (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>; + +// BinOpRM_F - Instructions like "cmp reg, [mem]". +class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDPatternOperator opnode> + : BinOpRM<opcode, mnemonic, typeinfo, (outs), + [(set EFLAGS, + (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>; + +// BinOpRM_RF - Instructions like "add reg, reg, [mem]". +class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>; + +// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]". +class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2), + EFLAGS))]>; + +// BinOpRI - Instructions like "add reg, reg, imm". +class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + Format f, dag outlist, list<dag> pattern> + : ITy<opcode, f, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + let ImmT = typeinfo.ImmEncoding; +} + +// BinOpRI_R - Instructions like "add reg, reg, imm". +class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, + (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; + +// BinOpRI_F - Instructions like "cmp reg, imm". +class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDPatternOperator opnode, Format f> + : BinOpRI<opcode, mnemonic, typeinfo, f, (outs), + [(set EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; + +// BinOpRI_RF - Instructions like "add reg, reg, imm". +class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; + +// BinOpRI_RFF - Instructions like "adc reg, reg, imm". +class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2, + EFLAGS))]>; + +// BinOpRI8 - Instructions like "add reg, reg, imm8". +class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + Format f, dag outlist, list<dag> pattern> + : ITy<opcode, f, typeinfo, outlist, + (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), + mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + let ImmT = Imm8; // Always 8-bit immediate. +} + +// BinOpRI8_R - Instructions like "add reg, reg, imm8". +class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, + (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>; + +// BinOpRI8_F - Instructions like "cmp reg, imm8". +class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs), + [(set EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>; + +// BinOpRI8_RF - Instructions like "add reg, reg, imm8". +class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>; + +// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8". +class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), + [(set typeinfo.RegClass:$dst, EFLAGS, + (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2, + EFLAGS))]>; + +// BinOpMR - Instructions like "add [mem], reg". +class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + list<dag> pattern> + : ITy<opcode, MRMDestMem, typeinfo, + (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), + mnemonic, "{$src, $dst|$dst, $src}", pattern>; + +// BinOpMR_RMW - Instructions like "add [mem], reg". +class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpMR<opcode, mnemonic, typeinfo, + [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMR_RMW_FF - Instructions like "adc [mem], reg". +class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpMR<opcode, mnemonic, typeinfo, + [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS), + addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMR_F - Instructions like "cmp [mem], reg". +class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + SDNode opnode> + : BinOpMR<opcode, mnemonic, typeinfo, + [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>; + +// BinOpMI - Instructions like "add [mem], imm". +class BinOpMI<string mnemonic, X86TypeInfo typeinfo, + Format f, list<dag> pattern, bits<8> opcode = 0x80> + : ITy<opcode, f, typeinfo, + (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), + mnemonic, "{$src, $dst|$dst, $src}", pattern> { + let ImmT = typeinfo.ImmEncoding; +} + +// BinOpMI_RMW - Instructions like "add [mem], imm". +class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI<mnemonic, typeinfo, f, + [(store (opnode (typeinfo.VT (load addr:$dst)), + typeinfo.ImmOperator:$src), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMI_RMW_FF - Instructions like "adc [mem], imm". +class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI<mnemonic, typeinfo, f, + [(store (opnode (typeinfo.VT (load addr:$dst)), + typeinfo.ImmOperator:$src, EFLAGS), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMI_F - Instructions like "cmp [mem], imm". +class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo, + SDPatternOperator opnode, Format f, bits<8> opcode = 0x80> + : BinOpMI<mnemonic, typeinfo, f, + [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)), + typeinfo.ImmOperator:$src))], + opcode>; + +// BinOpMI8 - Instructions like "add [mem], imm8". +class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, + Format f, list<dag> pattern> + : ITy<0x82, f, typeinfo, + (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), + mnemonic, "{$src, $dst|$dst, $src}", pattern> { + let ImmT = Imm8; // Always 8-bit immediate. +} + +// BinOpMI8_RMW - Instructions like "add [mem], imm8". +class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI8<mnemonic, typeinfo, f, + [(store (opnode (load addr:$dst), + typeinfo.Imm8Operator:$src), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8". +class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI8<mnemonic, typeinfo, f, + [(store (opnode (load addr:$dst), + typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst), + (implicit EFLAGS)]>; + +// BinOpMI8_F - Instructions like "cmp [mem], imm8". +class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo, + SDNode opnode, Format f> + : BinOpMI8<mnemonic, typeinfo, f, + [(set EFLAGS, (opnode (load addr:$dst), + typeinfo.Imm8Operator:$src))]>; + +// BinOpAI - Instructions like "add %eax, %eax, imm". +class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, + Register areg> + : ITy<opcode, RawFrm, typeinfo, + (outs), (ins typeinfo.ImmOperand:$src), + mnemonic, !strconcat("{$src, %", areg.AsmName, "|%", + areg.AsmName, ", $src}"), []> { + let ImmT = typeinfo.ImmEncoding; + let Uses = [areg]; + let Defs = [areg]; +} + +/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is +/// defined with "(set GPR:$dst, EFLAGS, (...". +/// +/// It would be nice to get rid of the second and third argument here, but +/// tblgen can't handle dependent type references aggressively enough: PR8330 +multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, + string mnemonic, Format RegMRM, Format MemMRM, + SDNode opnodeflag, SDNode opnode, + bit CommutableRR, bit ConvertibleToThreeAddress> { + let Defs = [EFLAGS] in { + let Constraints = "$src1 = $dst" in { + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def #NAME#8rr : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>; + def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>; + def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>; + def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>; + } // isCommutable + + def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; + def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; + def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; + def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + + def #NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>; + def #NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>; + def #NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>; + def #NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>; + + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>; + def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>; + def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>; + + def #NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>; + def #NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>; + def #NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>; + def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>; + } + } // Constraints = "$src1 = $dst" + + def #NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def #NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8mi : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>; + def #NAME#16mi : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + } +} + +/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is +/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and +/// SBB. +/// +/// It would be nice to get rid of the second and third argument here, but +/// tblgen can't handle dependent type references aggressively enough: PR8330 +multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, + string mnemonic, Format RegMRM, Format MemMRM, + SDNode opnode, bit CommutableRR, + bit ConvertibleToThreeAddress> { + let Defs = [EFLAGS] in { + let Constraints = "$src1 = $dst" in { + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def #NAME#8rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>; + } // isCommutable + + def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; + def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; + def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; + def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + + def #NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>; + def #NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>; + def #NAME#32rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>; + def #NAME#64rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>; + + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>; + def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>; + def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>; + + def #NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def #NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>; + def #NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>; + def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>; + } + } // Constraints = "$src1 = $dst" + + def #NAME#8mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64mr : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def #NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8mi : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>; + def #NAME#16mi : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + } +} + +/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is +/// defined with "(set EFLAGS, (...". It would be really nice to find a way +/// to factor this with the other ArithBinOp_*. +/// +multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, + string mnemonic, Format RegMRM, Format MemMRM, + SDNode opnode, + bit CommutableRR, bit ConvertibleToThreeAddress> { + let Defs = [EFLAGS] in { + let isCommutable = CommutableRR, + isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + def #NAME#8rr : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>; + } // isCommutable + + def #NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>; + def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>; + def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>; + def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>; + + def #NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>; + def #NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>; + def #NAME#32rm : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>; + def #NAME#64rm : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>; + + let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + // NOTE: These are order specific, we want the ri8 forms to be listed + // first so that they are slightly preferred to the ri forms. + def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>; + def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>; + def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>; + + def #NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>; + def #NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>; + def #NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>; + def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>; + } + + def #NAME#8mr : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>; + def #NAME#16mr : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>; + def #NAME#32mr : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>; + def #NAME#64mr : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>; + + // NOTE: These are order specific, we want the mi8 forms to be listed + // first so that they are slightly preferred to the mi forms. + def #NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8mi : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>; + def #NAME#16mi : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>; + def #NAME#32mi : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>; + def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>; + + def #NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>; + def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>; + def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>; + def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>; + } +} + + +defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m, + X86and_flag, and, 1, 0>; +defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m, + X86or_flag, or, 1, 0>; +defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m, + X86xor_flag, xor, 1, 0>; +defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m, + X86add_flag, add, 1, 1>; +defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m, + X86sub_flag, sub, 0, 0>; + +// Arithmetic. +let Uses = [EFLAGS] in { + defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag, + 1, 0>; + defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag, + 0, 0>; +} + +defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>; + + +//===----------------------------------------------------------------------===// +// Semantically, test instructions are similar like AND, except they don't +// generate a result. From an encoding perspective, they are very different: +// they don't have all the usual imm8 and REV forms, and are encoded into a +// different space. +def X86testpat : PatFrag<(ops node:$lhs, node:$rhs), + (X86cmp (and_su node:$lhs, node:$rhs), 0)>; + +let Defs = [EFLAGS] in { + let isCommutable = 1 in { + def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>; + def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>; + def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>; + def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>; + } // isCommutable + + def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>; + def TEST16rm : BinOpRM_F<0x84, "test", Xi16, X86testpat>; + def TEST32rm : BinOpRM_F<0x84, "test", Xi32, X86testpat>; + def TEST64rm : BinOpRM_F<0x84, "test", Xi64, X86testpat>; + + def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>; + def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>; + def TEST32ri : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>; + def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>; + + def TEST8mi : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>; + def TEST16mi : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>; + def TEST32mi : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>; + def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>; + + def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL>; + def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX>; + def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX>; + def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX>; +} + diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 2a6a71d..1ea8071 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -56,6 +56,31 @@ struct X86AddressMode { : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } + + + void getFullAddress(SmallVectorImpl<MachineOperand> &MO) { + assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); + + if (BaseType == X86AddressMode::RegBase) + MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, + false, false, false, 0, false)); + else { + assert(BaseType == X86AddressMode::FrameIndexBase); + MO.push_back(MachineOperand::CreateFI(Base.FrameIndex)); + } + + MO.push_back(MachineOperand::CreateImm(Scale)); + MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, + false, false, false, 0, false)); + + if (GV) + MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags)); + else + MO.push_back(MachineOperand::CreateImm(Disp)); + + MO.push_back(MachineOperand::CreateReg(0, false, false, + false, false, false, 0, false)); + } }; /// addDirectMem - This function is used to add a direct memory reference to the @@ -101,10 +126,11 @@ addFullAddress(const MachineInstrBuilder &MIB, if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); - else if (AM.BaseType == X86AddressMode::FrameIndexBase) + else { + assert(AM.BaseType == X86AddressMode::FrameIndexBase); MIB.addFrameIndex(AM.Base.FrameIndex); - else - assert (0); + } + MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); @@ -131,9 +157,8 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { if (TID.mayStore()) Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - Flags, Offset, - MFI.getObjectSize(FI), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset), + Flags, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); return addOffset(MIB.addFrameIndex(FI), Offset) .addMemOperand(MMO); diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td new file mode 100644 index 0000000..3a43b22 --- /dev/null +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -0,0 +1,104 @@ +//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 conditional move and set on condition +// instructions. +// +//===----------------------------------------------------------------------===// + + +// SetCC instructions. +multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + isCommutable = 1 in { + def #NAME#16rr + : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), + [(set GR16:$dst, + (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize; + def #NAME#32rr + : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), + [(set GR32:$dst, + (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB; + def #NAME#64rr + :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), + [(set GR64:$dst, + (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB; + } + + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { + def #NAME#16rm + : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), + !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), + [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), + CondNode, EFLAGS))]>, TB, OpSize; + def #NAME#32rm + : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), + !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"), + [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), + CondNode, EFLAGS))]>, TB; + def #NAME#64rm + :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), + !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"), + [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), + CondNode, EFLAGS))]>, TB; + } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" +} // end multiclass + + +// Conditional Moves. +defm CMOVO : CMOV<0x40, "cmovo" , X86_COND_O>; +defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>; +defm CMOVB : CMOV<0x42, "cmovb" , X86_COND_B>; +defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>; +defm CMOVE : CMOV<0x44, "cmove" , X86_COND_E>; +defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>; +defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>; +defm CMOVA : CMOV<0x47, "cmova" , X86_COND_A>; +defm CMOVS : CMOV<0x48, "cmovs" , X86_COND_S>; +defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>; +defm CMOVP : CMOV<0x4A, "cmovp" , X86_COND_P>; +defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>; +defm CMOVL : CMOV<0x4C, "cmovl" , X86_COND_L>; +defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>; +defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>; +defm CMOVG : CMOV<0x4F, "cmovg" , X86_COND_G>; + + +// SetCC instructions. +multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { + let Uses = [EFLAGS] in { + def r : I<opc, MRM0r, (outs GR8:$dst), (ins), + !strconcat(Mnemonic, "\t$dst"), + [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB; + def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), + !strconcat(Mnemonic, "\t$dst"), + [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB; + } // Uses = [EFLAGS] +} + +defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set +defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set +defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than +defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal +defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to +defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to +defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal +defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than +defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set +defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed +defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set +defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set +defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than +defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal +defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal +defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than + diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td new file mode 100644 index 0000000..4c915d9 --- /dev/null +++ b/lib/Target/X86/X86InstrCompiler.td @@ -0,0 +1,1626 @@ +//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the various pseudo instructions used by the compiler, +// as well as Pat patterns used during instruction selection. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Pattern Matching Support + +def GetLo32XForm : SDNodeXForm<imm, [{ + // Transformation function: get the low 32 bits. + return getI32Imm((unsigned)N->getZExtValue()); +}]>; + +def GetLo8XForm : SDNodeXForm<imm, [{ + // Transformation function: get the low 8 bits. + return getI8Imm((uint8_t)N->getZExtValue()); +}]>; + + +//===----------------------------------------------------------------------===// +// Random Pseudo Instructions. + +// PIC base construction. This expands to code that looks like this: +// call $next_inst +// popl %destreg" +let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in + def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), + "", []>; + + +// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into +// a stack adjustment and the codegen must know that they may modify the stack +// pointer before prolog-epilog rewriting occurs. +// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become +// sub / add which can clobber EFLAGS. +let Defs = [ESP, EFLAGS], Uses = [ESP] in { +def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), + "#ADJCALLSTACKDOWN", + [(X86callseq_start timm:$amt)]>, + Requires<[In32BitMode]>; +def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKUP", + [(X86callseq_end timm:$amt1, timm:$amt2)]>, + Requires<[In32BitMode]>; +} + +// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into +// a stack adjustment and the codegen must know that they may modify the stack +// pointer before prolog-epilog rewriting occurs. +// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become +// sub / add which can clobber EFLAGS. +let Defs = [RSP, EFLAGS], Uses = [RSP] in { +def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt), + "#ADJCALLSTACKDOWN", + [(X86callseq_start timm:$amt)]>, + Requires<[In64BitMode]>; +def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKUP", + [(X86callseq_end timm:$amt1, timm:$amt2)]>, + Requires<[In64BitMode]>; +} + + + +// x86-64 va_start lowering magic. +let usesCustomInserter = 1 in { +def VASTART_SAVE_XMM_REGS : I<0, Pseudo, + (outs), + (ins GR8:$al, + i64imm:$regsavefi, i64imm:$offset, + variable_ops), + "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", + [(X86vastart_save_xmm_regs GR8:$al, + imm:$regsavefi, + imm:$offset)]>; + +// The VAARG_64 pseudo-instruction takes the address of the va_list, +// and places the address of the next argument into a register. +let Defs = [EFLAGS] in +def VAARG_64 : I<0, Pseudo, + (outs GR64:$dst), + (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), + "#VAARG_64 $dst, $ap, $size, $mode, $align", + [(set GR64:$dst, + (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), + (implicit EFLAGS)]>; + +// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows +// targets. These calls are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in + def WIN_ALLOCA : I<0, Pseudo, (outs), (ins), + "# dynamic stack allocation", + [(X86WinAlloca)]>; +} + + + +//===----------------------------------------------------------------------===// +// EH Pseudo Instructions +// +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, isCodeGenOnly = 1 in { +def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), + "ret\t#eh_return, addr: $addr", + [(X86ehret GR32:$addr)]>; + +} + +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, isCodeGenOnly = 1 in { +def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), + "ret\t#eh_return, addr: $addr", + [(X86ehret GR64:$addr)]>; + +} + +//===----------------------------------------------------------------------===// +// Alias Instructions +//===----------------------------------------------------------------------===// + +// Alias instructions that map movr0 to xor. +// FIXME: remove when we can teach regalloc that xor reg, reg is ok. +// FIXME: Set encoding to pseudo. +let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, + isCodeGenOnly = 1 in { +def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", + [(set GR8:$dst, 0)]>; + +// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller +// encoding and avoids a partial-register update sometimes, but doing so +// at isel time interferes with rematerialization in the current register +// allocator. For now, this is rewritten when the instruction is lowered +// to an MCInst. +def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), + "", + [(set GR16:$dst, 0)]>, OpSize; + +// FIXME: Set encoding to pseudo. +def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, 0)]>; +} + +// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a +// smaller encoding, but doing so at isel time interferes with rematerialization +// in the current register allocator. For now, this is rewritten when the +// instruction is lowered to an MCInst. +// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove +// when we have a better way to specify isel priority. +let Defs = [EFLAGS], isCodeGenOnly=1, + AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", + [(set GR64:$dst, 0)]>; + +// Materialize i64 constant where top 32-bits are zero. This could theoretically +// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however +// that would make it more difficult to rematerialize. +let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, + isCodeGenOnly = 1 in +def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), + "", [(set GR64:$dst, i64immZExt32:$src)]>; + +// Use sbb to materialize carry bit. +let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { +// FIXME: These are pseudo ops that should be replaced with Pat<> patterns. +// However, Pat<> can't replicate the destination reg into the inputs of the +// result. +// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces +// X86CodeEmitter. +def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", + [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, + OpSize; +def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", + [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +} // isCodeGenOnly + + +def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; +def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C64r)>; + +def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C16r)>; +def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; +def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C64r)>; + +// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and +// will be eliminated and that the sbb can be extended up to a wider type. When +// this happens, it is great. However, if we are left with an 8-bit sbb and an +// and, we might as well just match it as a setb. +def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1), + (SETBr)>; + +//===----------------------------------------------------------------------===// +// String Pseudo Instructions +// +let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { +def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)]>, REP; +def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)]>, REP, OpSize; +def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)]>, REP; +} + +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in +def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", + [(X86rep_movs i64)]>, REP; + + +// FIXME: Should use "(X86rep_stos AL)" as the pattern. +let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in +def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)]>, REP; +let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in +def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)]>, REP, OpSize; +let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in +def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)]>, REP; + +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in +def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", + [(X86rep_stos i64)]>, REP; + + +//===----------------------------------------------------------------------===// +// Thread Local Storage Instructions +// + +// ELF TLS Support +// All calls clobber the non-callee saved registers. ESP is marked as +// a use to prevent stack-pointer assignments that appear immediately +// before calls from potentially appearing dead. +let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in +def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLS_addr32", + [(X86tlsaddr tls32addr:$sym)]>, + Requires<[In32BitMode]>; + +// All calls clobber the non-callee saved registers. RSP is marked as +// a use to prevent stack-pointer assignments that appear immediately +// before calls from potentially appearing dead. +let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in +def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLS_addr64", + [(X86tlsaddr tls64addr:$sym)]>, + Requires<[In64BitMode]>; + +// Darwin TLS Support +// For i386, the address of the thunk is passed on the stack, on return the +// address of the variable is in %eax. %ecx is trashed during the function +// call. All other registers are preserved. +let Defs = [EAX, ECX, EFLAGS], + Uses = [ESP], + usesCustomInserter = 1 in +def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLSCall_32", + [(X86TLSCall addr:$sym)]>, + Requires<[In32BitMode]>; + +// For x86_64, the address of the thunk is passed in %rdi, on return +// the address of the variable is in %rax. All other registers are preserved. +let Defs = [RAX, EFLAGS], + Uses = [RSP, RDI], + usesCustomInserter = 1 in +def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLSCall_64", + [(X86TLSCall addr:$sym)]>, + Requires<[In64BitMode]>; + + +//===----------------------------------------------------------------------===// +// Conditional Move Pseudo Instructions + +let Constraints = "$src1 = $dst" in { + +// Conditional moves +let Uses = [EFLAGS] in { + +// X86 doesn't have 8-bit conditional moves. Use a customInserter to +// emit control flow. An alternative to this is to mark i8 SELECT as Promote, +// however that requires promoting the operands, and can induce additional +// i8 register pressure. Note that CMOV_GR8 is conservatively considered to +// clobber EFLAGS, because if one of the operands is zero, the expansion +// could involve an xor. +let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { +def CMOV_GR8 : I<0, Pseudo, + (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), + "#CMOV_GR8 PSEUDO!", + [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2, + imm:$cond, EFLAGS))]>; + +let Predicates = [NoCMov] in { +def CMOV_GR32 : I<0, Pseudo, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond), + "#CMOV_GR32* PSEUDO!", + [(set GR32:$dst, + (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>; +def CMOV_GR16 : I<0, Pseudo, + (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond), + "#CMOV_GR16* PSEUDO!", + [(set GR16:$dst, + (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>; +def CMOV_RFP32 : I<0, Pseudo, + (outs RFP32:$dst), + (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), + "#CMOV_RFP32 PSEUDO!", + [(set RFP32:$dst, + (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, + EFLAGS))]>; +def CMOV_RFP64 : I<0, Pseudo, + (outs RFP64:$dst), + (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), + "#CMOV_RFP64 PSEUDO!", + [(set RFP64:$dst, + (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, + EFLAGS))]>; +def CMOV_RFP80 : I<0, Pseudo, + (outs RFP80:$dst), + (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), + "#CMOV_RFP80 PSEUDO!", + [(set RFP80:$dst, + (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, + EFLAGS))]>; +} // Predicates = [NoCMov] +} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] +} // Uses = [EFLAGS] + +} // Constraints = "$src1 = $dst" in + + +//===----------------------------------------------------------------------===// +// Atomic Instruction Pseudo Instructions +//===----------------------------------------------------------------------===// + +// Atomic exchange, and, or, xor +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomInserter = 1 in { + +def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMAND8 PSEUDO!", + [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>; +def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMOR8 PSEUDO!", + [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>; +def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMXOR8 PSEUDO!", + [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>; +def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), + "#ATOMNAND8 PSEUDO!", + [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>; + +def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMAND16 PSEUDO!", + [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>; +def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMOR16 PSEUDO!", + [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>; +def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMXOR16 PSEUDO!", + [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>; +def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMNAND16 PSEUDO!", + [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>; +def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), + "#ATOMMIN16 PSEUDO!", + [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>; +def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMMAX16 PSEUDO!", + [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>; +def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMUMIN16 PSEUDO!", + [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>; +def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), + "#ATOMUMAX16 PSEUDO!", + [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>; + + +def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMAND32 PSEUDO!", + [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>; +def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMOR32 PSEUDO!", + [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>; +def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMXOR32 PSEUDO!", + [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>; +def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMNAND32 PSEUDO!", + [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>; +def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), + "#ATOMMIN32 PSEUDO!", + [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>; +def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMMAX32 PSEUDO!", + [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>; +def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMIN32 PSEUDO!", + [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>; +def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMAX32 PSEUDO!", + [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>; + + + +def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMAND64 PSEUDO!", + [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>; +def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMOR64 PSEUDO!", + [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>; +def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMXOR64 PSEUDO!", + [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>; +def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMNAND64 PSEUDO!", + [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>; +def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), + "#ATOMMIN64 PSEUDO!", + [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>; +def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMMAX64 PSEUDO!", + [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>; +def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMUMIN64 PSEUDO!", + [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>; +def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), + "#ATOMUMAX64 PSEUDO!", + [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; +} + +let Constraints = "$val1 = $dst1, $val2 = $dst2", + Defs = [EFLAGS, EAX, EBX, ECX, EDX], + Uses = [EAX, EBX, ECX, EDX], + mayLoad = 1, mayStore = 1, + usesCustomInserter = 1 in { +def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMAND6432 PSEUDO!", []>; +def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMOR6432 PSEUDO!", []>; +def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMXOR6432 PSEUDO!", []>; +def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMNAND6432 PSEUDO!", []>; +def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMADD6432 PSEUDO!", []>; +def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMSUB6432 PSEUDO!", []>; +def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + "#ATOMSWAP6432 PSEUDO!", []>; +} + +//===----------------------------------------------------------------------===// +// Normal-Instructions-With-Lock-Prefix Pseudo Instructions +//===----------------------------------------------------------------------===// + +// FIXME: Use normal instructions and add lock prefix dynamically. + +// Memory barriers + +// TODO: Get this to fold the constant into the instruction. +let isCodeGenOnly = 1 in +def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), + "lock\n\t" + "or{l}\t{$zero, $dst|$dst, $zero}", + []>, Requires<[In32BitMode]>, LOCK; + +let hasSideEffects = 1 in +def Int_MemBarrier : I<0, Pseudo, (outs), (ins), + "#MEMBARRIER", + [(X86MemBarrier)]>, Requires<[HasSSE2]>; + +// TODO: Get this to fold the constant into the instruction. +let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in +def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero), + "lock\n\t" + "or{q}\t{$zero, (%rsp)|(%rsp), $zero}", + [(X86MemBarrierNoSSE GR64:$zero)]>, + Requires<[In64BitMode]>, LOCK; + + +// Optimized codegen when the non-memory output is not used. +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { +def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), + (ins i64mem:$dst, i64i32imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + + +def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), + (ins i64mem:$dst, i64i32imm:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + + +def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), + "lock\n\t" + "inc{b}\t$dst", []>, LOCK; +def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), + "lock\n\t" + "inc{w}\t$dst", []>, OpSize, LOCK; +def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), + "lock\n\t" + "inc{l}\t$dst", []>, LOCK; +def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "lock\n\t" + "inc{q}\t$dst", []>, LOCK; + +def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), + "lock\n\t" + "dec{b}\t$dst", []>, LOCK; +def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), + "lock\n\t" + "dec{w}\t$dst", []>, OpSize, LOCK; +def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), + "lock\n\t" + "dec{l}\t$dst", []>, LOCK; +def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), + "lock\n\t" + "dec{q}\t$dst", []>, LOCK; +} + +// Atomic compare and swap. +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], + isCodeGenOnly = 1 in { +def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), + "lock\n\t" + "cmpxchg8b\t$ptr", + [(X86cas8 addr:$ptr)]>, TB, LOCK; +} +let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { +def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), + "lock\n\t" + "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; +} + +let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in { +def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), + "lock\n\t" + "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; +} + +let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in { +def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), + "lock\n\t" + "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", + [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; +} + +let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { +def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), + "lock\n\t" + "cmpxchgq\t$swap,$ptr", + [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; +} + +// Atomic exchange and add +let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { +def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), + "lock\n\t" + "xadd{b}\t{$val, $ptr|$ptr, $val}", + [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, + TB, LOCK; +def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), + "lock\n\t" + "xadd{w}\t{$val, $ptr|$ptr, $val}", + [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, + TB, OpSize, LOCK; +def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), + "lock\n\t" + "xadd{l}\t{$val, $ptr|$ptr, $val}", + [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, + TB, LOCK; +def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), + "lock\n\t" + "xadd\t$val, $ptr", + [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, + TB, LOCK; +} + +//===----------------------------------------------------------------------===// +// Conditional Move Pseudo Instructions. +//===----------------------------------------------------------------------===// + + +// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after +// instruction selection into a branch sequence. +let Uses = [EFLAGS], usesCustomInserter = 1 in { + def CMOV_FR32 : I<0, Pseudo, + (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond), + "#CMOV_FR32 PSEUDO!", + [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond, + EFLAGS))]>; + def CMOV_FR64 : I<0, Pseudo, + (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond), + "#CMOV_FR64 PSEUDO!", + [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond, + EFLAGS))]>; + def CMOV_V4F32 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V4F32 PSEUDO!", + [(set VR128:$dst, + (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V2F64 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V2F64 PSEUDO!", + [(set VR128:$dst, + (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V2I64 : I<0, Pseudo, + (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), + "#CMOV_V2I64 PSEUDO!", + [(set VR128:$dst, + (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond, + EFLAGS)))]>; +} + + +//===----------------------------------------------------------------------===// +// DAG Pattern Matching Rules +//===----------------------------------------------------------------------===// + +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable +def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>; +def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>; +def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>; +def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>; +def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>; +def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>; + +def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)), + (ADD32ri GR32:$src1, tconstpool:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)), + (ADD32ri GR32:$src1, tjumptable:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)), + (ADD32ri GR32:$src1, tglobaladdr:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)), + (ADD32ri GR32:$src1, texternalsym:$src2)>; +def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)), + (ADD32ri GR32:$src1, tblockaddress:$src2)>; + +def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst), + (MOV32mi addr:$dst, tglobaladdr:$src)>; +def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), + (MOV32mi addr:$dst, texternalsym:$src)>; +def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst), + (MOV32mi addr:$dst, tblockaddress:$src)>; + + + +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small +// code model mode, should use 'movabs'. FIXME: This is really a hack, the +// 'movabs' predicate should handle this sort of thing. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri tconstpool :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri tjumptable :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), + (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>; + +// In static codegen with small code model, we can get the address of a label +// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri64i32 should accept these. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), + (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>; + +// In kernel code model, we can get the address of a label +// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri32 should accept these. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; +def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), + (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; + +// If we have small model and -static mode, it is safe to store global addresses +// directly as immediates. FIXME: This is really a hack, the 'imm' predicate +// for MOV64mi32 should handle this sort of thing. +def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tconstpool:$src)>, + Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tjumptable:$src)>, + Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tglobaladdr:$src)>, + Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), + (MOV64mi32 addr:$dst, texternalsym:$src)>, + Requires<[NearData, IsStatic]>; +def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tblockaddress:$src)>, + Requires<[NearData, IsStatic]>; + + + +// Calls + +// tls has some funny stuff here... +// This corresponds to movabs $foo@tpoff, %rax +def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), + (MOV64ri tglobaltlsaddr :$dst)>; +// This corresponds to add $foo@tpoff, %rax +def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), + (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; +// This corresponds to mov foo@tpoff(%rbx), %eax +def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), + (MOV64rm tglobaltlsaddr :$dst)>; + + +// Direct PC relative function call for small code model. 32-bit displacement +// sign extended to 64-bit. +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; + +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; + +// tailcall stuff +def : Pat<(X86tcret GR32_TC:$dst, imm:$off), + (TCRETURNri GR32_TC:$dst, imm:$off)>, + Requires<[In32BitMode]>; + +// FIXME: This is disabled for 32-bit PIC mode because the global base +// register which is part of the address mode may be assigned a +// callee-saved register. +def : Pat<(X86tcret (load addr:$dst), imm:$off), + (TCRETURNmi addr:$dst, imm:$off)>, + Requires<[In32BitMode, IsNotPIC]>; + +def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), + (TCRETURNdi texternalsym:$dst, imm:$off)>, + Requires<[In32BitMode]>; + +def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), + (TCRETURNdi texternalsym:$dst, imm:$off)>, + Requires<[In32BitMode]>; + +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +def : Pat<(X86tcret (load addr:$dst), imm:$off), + (TCRETURNmi64 addr:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), + (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), + (TCRETURNdi64 texternalsym:$dst, imm:$off)>, + Requires<[In64BitMode]>; + +// Normal calls, with various flavors of addresses. +def : Pat<(X86call (i32 tglobaladdr:$dst)), + (CALLpcrel32 tglobaladdr:$dst)>; +def : Pat<(X86call (i32 texternalsym:$dst)), + (CALLpcrel32 texternalsym:$dst)>; +def : Pat<(X86call (i32 imm:$dst)), + (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; + +// Comparisons. + +// TEST R,R is smaller than CMP R,0 +def : Pat<(X86cmp GR8:$src1, 0), + (TEST8rr GR8:$src1, GR8:$src1)>; +def : Pat<(X86cmp GR16:$src1, 0), + (TEST16rr GR16:$src1, GR16:$src1)>; +def : Pat<(X86cmp GR32:$src1, 0), + (TEST32rr GR32:$src1, GR32:$src1)>; +def : Pat<(X86cmp GR64:$src1, 0), + (TEST64rr GR64:$src1, GR64:$src1)>; + +// Conditional moves with folded loads with operands swapped and conditions +// inverted. +multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32, + Instruction Inst64> { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), + (Inst16 GR16:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), + (Inst32 GR32:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), + (Inst64 GR64:$src2, addr:$src1)>; +} + +defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>; +defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>; +defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>; +defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>; +defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>; +defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>; +defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>; +defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>; +defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>; +defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>; +defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>; +defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>; +defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>; +defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>; +defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>; +defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>; + +// zextload bool -> zextload byte +def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; +def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; +def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; +def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; + +// extload bool -> extload byte +// When extloading from 16-bit and smaller memory locations into 64-bit +// registers, use zero-extending loads so that the entire 64-bit register is +// defined, avoiding partial-register updates. + +def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; +def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; +def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; +def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; +def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; +def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; + +def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; +// For other extloads, use subregs, since the high contents of the register are +// defined after an extload. +def : Pat<(extloadi64i32 addr:$src), + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), + sub_32bit)>; + +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; +def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; + +// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. +def : Pat<(i32 (anyext GR16:$src)), + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; + +def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; +def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; +def : Pat<(i64 (anyext GR32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; + + +// Any instruction that defines a 32-bit result leaves the high half of the +// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may +// be copying from a truncate. And x86's cmov doesn't do anything if the +// condition is false. But any other 32-bit operation will zero-extend +// up to 64 bits. +def def32 : PatLeaf<(i32 GR32:$src), [{ + return N->getOpcode() != ISD::TRUNCATE && + N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && + N->getOpcode() != ISD::CopyFromReg && + N->getOpcode() != X86ISD::CMOV; +}]>; + +// In the case of a 32-bit def that is known to implicitly zero-extend, +// we can use a SUBREG_TO_REG. +def : Pat<(i64 (zext def32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; + +//===----------------------------------------------------------------------===// +// Pattern match OR as ADD +//===----------------------------------------------------------------------===// + +// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be +// 3-addressified into an LEA instruction to avoid copies. However, we also +// want to finally emit these instructions as an or at the end of the code +// generator to make the generated code easier to read. To do this, we select +// into "disjoint bits" pseudo ops. + +// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt KnownZero0, KnownOne0; + CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; +}]>; + + +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. +let AddedComplexity = 5 in { // Try this before the selecting to OR + +let isConvertibleToThreeAddress = 1, + Constraints = "$src1 = $dst", Defs = [EFLAGS] in { +let isCommutable = 1 in { +def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "", // orw/addw REG, REG + [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>; +def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "", // orl/addl REG, REG + [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>; +def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "", // orq/addq REG, REG + [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>; +} // isCommutable + +// NOTE: These are order specific, we want the ri8 forms to be listed +// first so that they are slightly preferred to the ri forms. + +def ADD16ri8_DB : I<0, Pseudo, + (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), + "", // orw/addw REG, imm8 + [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>; +def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "", // orw/addw REG, imm + [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>; + +def ADD32ri8_DB : I<0, Pseudo, + (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), + "", // orl/addl REG, imm8 + [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>; +def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "", // orl/addl REG, imm + [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>; + + +def ADD64ri8_DB : I<0, Pseudo, + (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), + "", // orq/addq REG, imm8 + [(set GR64:$dst, (or_is_add GR64:$src1, + i64immSExt8:$src2))]>; +def ADD64ri32_DB : I<0, Pseudo, + (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), + "", // orq/addq REG, imm + [(set GR64:$dst, (or_is_add GR64:$src1, + i64immSExt32:$src2))]>; +} +} // AddedComplexity + + +//===----------------------------------------------------------------------===// +// Some peepholes +//===----------------------------------------------------------------------===// + +// Odd encoding trick: -128 fits into an 8-bit immediate field while +// +128 doesn't, so in this special case use a sub instead of an add. +def : Pat<(add GR16:$src1, 128), + (SUB16ri8 GR16:$src1, -128)>; +def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), + (SUB16mi8 addr:$dst, -128)>; + +def : Pat<(add GR32:$src1, 128), + (SUB32ri8 GR32:$src1, -128)>; +def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), + (SUB32mi8 addr:$dst, -128)>; + +def : Pat<(add GR64:$src1, 128), + (SUB64ri8 GR64:$src1, -128)>; +def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), + (SUB64mi8 addr:$dst, -128)>; + +// The same trick applies for 32-bit immediate fields in 64-bit +// instructions. +def : Pat<(add GR64:$src1, 0x0000000080000000), + (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; +def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), + (SUB64mi32 addr:$dst, 0xffffffff80000000)>; + +// To avoid needing to materialize an immediate in a register, use a 32-bit and +// with implicit zero-extension instead of a 64-bit and if the immediate has at +// least 32 bits of leading zeros. If in addition the last 32 bits can be +// represented with a sign extension of a 8 bit constant, use that. + +def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri8 + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo8XForm imm:$imm))), + sub_32bit)>; + +def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo32XForm imm:$imm))), + sub_32bit)>; + + +// r & (2^16-1) ==> movz +def : Pat<(and GR32:$src1, 0xffff), + (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR32:$src1, 0xff), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, + GR32_ABCD)), + sub_8bit))>, + Requires<[In32BitMode]>; +// r & (2^8-1) ==> movz +def : Pat<(and GR16:$src1, 0xff), + (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, + GR16_ABCD)), + sub_8bit))>, + Requires<[In32BitMode]>; + +// r & (2^32-1) ==> movz +def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), + (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; +// r & (2^16-1) ==> movz +def : Pat<(and GR64:$src, 0xffff), + (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR64:$src, 0xff), + (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR32:$src1, 0xff), + (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, + Requires<[In64BitMode]>; +// r & (2^8-1) ==> movz +def : Pat<(and GR16:$src1, 0xff), + (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>, + Requires<[In64BitMode]>; + + +// sext_inreg patterns +def : Pat<(sext_inreg GR32:$src, i16), + (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; +def : Pat<(sext_inreg GR32:$src, i8), + (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + sub_8bit))>, + Requires<[In32BitMode]>; +def : Pat<(sext_inreg GR16:$src, i8), + (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, + GR16_ABCD)), + sub_8bit))>, + Requires<[In32BitMode]>; + +def : Pat<(sext_inreg GR64:$src, i32), + (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; +def : Pat<(sext_inreg GR64:$src, i16), + (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; +def : Pat<(sext_inreg GR64:$src, i8), + (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; +def : Pat<(sext_inreg GR32:$src, i8), + (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, + Requires<[In64BitMode]>; +def : Pat<(sext_inreg GR16:$src, i8), + (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>, + Requires<[In64BitMode]>; + + +// trunc patterns +def : Pat<(i16 (trunc GR32:$src)), + (EXTRACT_SUBREG GR32:$src, sub_16bit)>; +def : Pat<(i8 (trunc GR32:$src)), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + sub_8bit)>, + Requires<[In32BitMode]>; +def : Pat<(i8 (trunc GR16:$src)), + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit)>, + Requires<[In32BitMode]>; +def : Pat<(i32 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, sub_32bit)>; +def : Pat<(i16 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, sub_16bit)>; +def : Pat<(i8 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, sub_8bit)>; +def : Pat<(i8 (trunc GR32:$src)), + (EXTRACT_SUBREG GR32:$src, sub_8bit)>, + Requires<[In64BitMode]>; +def : Pat<(i8 (trunc GR16:$src)), + (EXTRACT_SUBREG GR16:$src, sub_8bit)>, + Requires<[In64BitMode]>; + +// h-register tricks +def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)>, + Requires<[In32BitMode]>; +def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + sub_8bit_hi)>, + Requires<[In32BitMode]>; +def : Pat<(srl GR16:$src, (i8 8)), + (EXTRACT_SUBREG + (MOVZX32rr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)), + sub_16bit)>, + Requires<[In32BitMode]>; +def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), + (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, + GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In32BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, + GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In32BitMode]>; +def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In32BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In32BitMode]>; + +// h-register tricks. +// For now, be conservative on x86-64 and use an h-register extract only if the +// value is immediately zero-extended or stored, which are somewhat common +// cases. This uses a bunch of code to prevent a register requiring a REX prefix +// from being allocated in the same instruction as the h register, as there's +// currently no way to describe this requirement to the register allocator. + +// h-register extract and zero-extend. +def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), + sub_8bit_hi)), + sub_32bit)>; +def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(srl GR16:$src, (i8 8)), + (EXTRACT_SUBREG + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)), + sub_16bit)>, + Requires<[In64BitMode]>; +def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)), + sub_32bit)>; +def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi)), + sub_32bit)>; + +// h-register extract and store. +def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), + sub_8bit_hi))>; +def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; +def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), + (MOV8mr_NOREX + addr:$dst, + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + sub_8bit_hi))>, + Requires<[In64BitMode]>; + + +// (shl x, 1) ==> (add x, x) +def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; +def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; +def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; +def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; + +// (shl x (and y, 31)) ==> (shl x, y) +def : Pat<(shl GR8:$src1, (and CL, 31)), + (SHL8rCL GR8:$src1)>; +def : Pat<(shl GR16:$src1, (and CL, 31)), + (SHL16rCL GR16:$src1)>; +def : Pat<(shl GR32:$src1, (and CL, 31)), + (SHL32rCL GR32:$src1)>; +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), + (SHL8mCL addr:$dst)>; +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), + (SHL16mCL addr:$dst)>; +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), + (SHL32mCL addr:$dst)>; + +def : Pat<(srl GR8:$src1, (and CL, 31)), + (SHR8rCL GR8:$src1)>; +def : Pat<(srl GR16:$src1, (and CL, 31)), + (SHR16rCL GR16:$src1)>; +def : Pat<(srl GR32:$src1, (and CL, 31)), + (SHR32rCL GR32:$src1)>; +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), + (SHR8mCL addr:$dst)>; +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), + (SHR16mCL addr:$dst)>; +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), + (SHR32mCL addr:$dst)>; + +def : Pat<(sra GR8:$src1, (and CL, 31)), + (SAR8rCL GR8:$src1)>; +def : Pat<(sra GR16:$src1, (and CL, 31)), + (SAR16rCL GR16:$src1)>; +def : Pat<(sra GR32:$src1, (and CL, 31)), + (SAR32rCL GR32:$src1)>; +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), + (SAR8mCL addr:$dst)>; +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), + (SAR16mCL addr:$dst)>; +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), + (SAR32mCL addr:$dst)>; + +// (shl x (and y, 63)) ==> (shl x, y) +def : Pat<(shl GR64:$src1, (and CL, 63)), + (SHL64rCL GR64:$src1)>; +def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), + (SHL64mCL addr:$dst)>; + +def : Pat<(srl GR64:$src1, (and CL, 63)), + (SHR64rCL GR64:$src1)>; +def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), + (SHR64mCL addr:$dst)>; + +def : Pat<(sra GR64:$src1, (and CL, 63)), + (SAR64rCL GR64:$src1)>; +def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), + (SAR64mCL addr:$dst)>; + + +// (anyext (setcc_carry)) -> (setcc_carry) +def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; +def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; + + + + +//===----------------------------------------------------------------------===// +// EFLAGS-defining Patterns +//===----------------------------------------------------------------------===// + +// add reg, reg +def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; + +// add reg, mem +def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), + (ADD8rm GR8:$src1, addr:$src2)>; +def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), + (ADD16rm GR16:$src1, addr:$src2)>; +def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), + (ADD32rm GR32:$src1, addr:$src2)>; + +// add reg, imm +def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; +def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; +def : Pat<(add GR16:$src1, i16immSExt8:$src2), + (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(add GR32:$src1, i32immSExt8:$src2), + (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// sub reg, reg +def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; +def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; + +// sub reg, mem +def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), + (SUB8rm GR8:$src1, addr:$src2)>; +def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), + (SUB16rm GR16:$src1, addr:$src2)>; +def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), + (SUB32rm GR32:$src1, addr:$src2)>; + +// sub reg, imm +def : Pat<(sub GR8:$src1, imm:$src2), + (SUB8ri GR8:$src1, imm:$src2)>; +def : Pat<(sub GR16:$src1, imm:$src2), + (SUB16ri GR16:$src1, imm:$src2)>; +def : Pat<(sub GR32:$src1, imm:$src2), + (SUB32ri GR32:$src1, imm:$src2)>; +def : Pat<(sub GR16:$src1, i16immSExt8:$src2), + (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(sub GR32:$src1, i32immSExt8:$src2), + (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// mul reg, reg +def : Pat<(mul GR16:$src1, GR16:$src2), + (IMUL16rr GR16:$src1, GR16:$src2)>; +def : Pat<(mul GR32:$src1, GR32:$src2), + (IMUL32rr GR32:$src1, GR32:$src2)>; + +// mul reg, mem +def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), + (IMUL16rm GR16:$src1, addr:$src2)>; +def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), + (IMUL32rm GR32:$src1, addr:$src2)>; + +// mul reg, imm +def : Pat<(mul GR16:$src1, imm:$src2), + (IMUL16rri GR16:$src1, imm:$src2)>; +def : Pat<(mul GR32:$src1, imm:$src2), + (IMUL32rri GR32:$src1, imm:$src2)>; +def : Pat<(mul GR16:$src1, i16immSExt8:$src2), + (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(mul GR32:$src1, i32immSExt8:$src2), + (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; + +// reg = mul mem, imm +def : Pat<(mul (loadi16 addr:$src1), imm:$src2), + (IMUL16rmi addr:$src1, imm:$src2)>; +def : Pat<(mul (loadi32 addr:$src1), imm:$src2), + (IMUL32rmi addr:$src1, imm:$src2)>; +def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2), + (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; +def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2), + (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; + +// Optimize multiply by 2 with EFLAGS result. +let AddedComplexity = 2 in { +def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>; +def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>; +} + +// Patterns for nodes that do not produce flags, for instructions that do. + +// addition +def : Pat<(add GR64:$src1, GR64:$src2), + (ADD64rr GR64:$src1, GR64:$src2)>; +def : Pat<(add GR64:$src1, i64immSExt8:$src2), + (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(add GR64:$src1, i64immSExt32:$src2), + (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), + (ADD64rm GR64:$src1, addr:$src2)>; + +// subtraction +def : Pat<(sub GR64:$src1, GR64:$src2), + (SUB64rr GR64:$src1, GR64:$src2)>; +def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), + (SUB64rm GR64:$src1, addr:$src2)>; +def : Pat<(sub GR64:$src1, i64immSExt8:$src2), + (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(sub GR64:$src1, i64immSExt32:$src2), + (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Multiply +def : Pat<(mul GR64:$src1, GR64:$src2), + (IMUL64rr GR64:$src1, GR64:$src2)>; +def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), + (IMUL64rm GR64:$src1, addr:$src2)>; +def : Pat<(mul GR64:$src1, i64immSExt8:$src2), + (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(mul GR64:$src1, i64immSExt32:$src2), + (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2), + (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>; +def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), + (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; + +// Increment reg. +def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>; +def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; + +// Decrement reg. +def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>; +def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; +def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; + +// or reg/reg. +def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; +def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>; + +// or reg/mem +def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), + (OR8rm GR8:$src1, addr:$src2)>; +def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), + (OR16rm GR16:$src1, addr:$src2)>; +def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), + (OR32rm GR32:$src1, addr:$src2)>; +def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), + (OR64rm GR64:$src1, addr:$src2)>; + +// or reg/imm +def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; +def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; +def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; +def : Pat<(or GR16:$src1, i16immSExt8:$src2), + (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(or GR32:$src1, i32immSExt8:$src2), + (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(or GR64:$src1, i64immSExt8:$src2), + (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(or GR64:$src1, i64immSExt32:$src2), + (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// xor reg/reg +def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; +def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>; + +// xor reg/mem +def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), + (XOR8rm GR8:$src1, addr:$src2)>; +def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), + (XOR16rm GR16:$src1, addr:$src2)>; +def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), + (XOR32rm GR32:$src1, addr:$src2)>; +def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), + (XOR64rm GR64:$src1, addr:$src2)>; + +// xor reg/imm +def : Pat<(xor GR8:$src1, imm:$src2), + (XOR8ri GR8:$src1, imm:$src2)>; +def : Pat<(xor GR16:$src1, imm:$src2), + (XOR16ri GR16:$src1, imm:$src2)>; +def : Pat<(xor GR32:$src1, imm:$src2), + (XOR32ri GR32:$src1, imm:$src2)>; +def : Pat<(xor GR16:$src1, i16immSExt8:$src2), + (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(xor GR32:$src1, i32immSExt8:$src2), + (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(xor GR64:$src1, i64immSExt8:$src2), + (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(xor GR64:$src1, i64immSExt32:$src2), + (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// and reg/reg +def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; +def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; +def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; +def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>; + +// and reg/mem +def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), + (AND8rm GR8:$src1, addr:$src2)>; +def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), + (AND16rm GR16:$src1, addr:$src2)>; +def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), + (AND32rm GR32:$src1, addr:$src2)>; +def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), + (AND64rm GR64:$src1, addr:$src2)>; + +// and reg/imm +def : Pat<(and GR8:$src1, imm:$src2), + (AND8ri GR8:$src1, imm:$src2)>; +def : Pat<(and GR16:$src1, imm:$src2), + (AND16ri GR16:$src1, imm:$src2)>; +def : Pat<(and GR32:$src1, imm:$src2), + (AND32ri GR32:$src1, imm:$src2)>; +def : Pat<(and GR16:$src1, i16immSExt8:$src2), + (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(and GR32:$src1, i32immSExt8:$src2), + (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(and GR64:$src1, i64immSExt8:$src2), + (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(and GR64:$src1, i64immSExt32:$src2), + (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td new file mode 100644 index 0000000..77f4725 --- /dev/null +++ b/lib/Target/X86/X86InstrControl.td @@ -0,0 +1,294 @@ +//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 jump, return, call, and related instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Control Flow Instructions. +// + +// Return instructions. +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, FPForm = SpecialFP in { + def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), + "ret", + [(X86retflag 0)]>; + def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + "ret\t$amt", + [(X86retflag timm:$amt)]>; + def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + "retw\t$amt", + []>, OpSize; + def LRETL : I <0xCB, RawFrm, (outs), (ins), + "lretl", []>; + def LRETQ : RI <0xCB, RawFrm, (outs), (ins), + "lretq", []>; + def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "lret\t$amt", []>; + def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "lretw\t$amt", []>, OpSize; +} + +// Unconditional branches. +let isBarrier = 1, isBranch = 1, isTerminator = 1 in { + def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp\t$dst", [(br bb:$dst)]>; + def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst", []>; + def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp{q}\t$dst", []>; +} + +// Conditional Branches. +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { + multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, + [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + } +} + +defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; +defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; +defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; +defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; +defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; +defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; +defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; +defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; +defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; +defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; +defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; +defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; +defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; +defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; +defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; +defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; + +// jcx/jecx/jrcx instructions. +let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in { + // These are the 32-bit versions of this instruction for the asmparser. In + // 32-bit mode, the address size prefix is jcxz and the unprefixed version is + // jecxz. + let Uses = [CX] in + def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>; + let Uses = [ECX] in + def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jecxz\t$dst", []>, Requires<[In32BitMode]>; + + // J*CXZ instruction: 64-bit versions of this instruction for the asmparser. + // In 64-bit mode, the address size prefix is jecxz and the unprefixed version + // is jrcxz. + let Uses = [ECX] in + def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>; + let Uses = [RCX] in + def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jrcxz\t$dst", []>, Requires<[In64BitMode]>; +} + +// Indirect branches +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", + [(brind GR32:$dst)]>, Requires<[In32BitMode]>; + def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", + [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>; + + def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", + [(brind GR64:$dst)]>, Requires<[In64BitMode]>; + def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", + [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>; + + def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), + (ins i32imm:$off, i16imm:$seg), + "ljmp{l}\t{$seg, $off|$off, $seg}", []>; + def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), + "ljmp{q}\t{*}$dst", []>; + + def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), + "ljmp{w}\t{*}$dst", []>, OpSize; + def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), + "ljmp{l}\t{*}$dst", []>; +} + + +// Loop instructions + +def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; +def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; +def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; + +//===----------------------------------------------------------------------===// +// Call Instructions... +// +let isCall = 1 in + // All calls clobber the non-callee saved registers. ESP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in { + def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i32imm_pcrel:$dst,variable_ops), + "call{l}\t$dst", []>, Requires<[In32BitMode]>; + def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), + "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, + Requires<[In32BitMode]>; + def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), + "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>, + Requires<[In32BitMode]>; + + def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), + (ins i16imm:$off, i16imm:$seg), + "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize; + def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), + (ins i32imm:$off, i16imm:$seg), + "lcall{l}\t{$seg, $off|$off, $seg}", []>; + + def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), + "lcall{w}\t{*}$dst", []>, OpSize; + def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), + "lcall{l}\t{*}$dst", []>; + + // callw for 16 bit code for the assembler. + let isAsmParserOnly = 1 in + def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, + (outs), (ins i16imm_pcrel:$dst, variable_ops), + "callw\t$dst", []>, OpSize; + } + + +// Tail call stuff. + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in + let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in { + def TCRETURNdi : PseudoI<(outs), + (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>; + def TCRETURNri : PseudoI<(outs), + (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>; + let mayLoad = 1 in + def TCRETURNmi : PseudoI<(outs), + (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>; + + // FIXME: The should be pseudo instructions that are lowered when going to + // mcinst. + def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), + (ins i32imm_pcrel:$dst, variable_ops), + "jmp\t$dst # TAILCALL", + []>; + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), + "", []>; // FIXME: Remove encoding when JIT is dead. + let mayLoad = 1 in + def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), + "jmp{l}\t{*}$dst # TAILCALL", []>; +} + + +//===----------------------------------------------------------------------===// +// Call Instructions... +// +let isCall = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in { + + // NOTE: this pattern doesn't match "X86call imm", because we do not know + // that the offset between an arbitrary immediate and the call will fit in + // the 32-bit pcrel field that we have. + def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call{q}\t$dst", []>, + Requires<[In64BitMode, NotWin64]>; + def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, + Requires<[In64BitMode, NotWin64]>; + def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, + Requires<[In64BitMode, NotWin64]>; + + def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), + "lcall{q}\t{*}$dst", []>; + } + + // FIXME: We need to teach codegen about single list of call-clobbered + // registers. +let isCall = 1, isCodeGenOnly = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP] in { + def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call{q}\t$dst", []>, + Requires<[IsWin64]>; + def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call{q}\t{*}$dst", + [(X86call GR64:$dst)]>, Requires<[IsWin64]>; + def WINCALL64m : I<0xFF, MRM2m, (outs), + (ins i64mem:$dst,variable_ops), + "call{q}\t{*}$dst", + [(X86call (loadi64 addr:$dst))]>, + Requires<[IsWin64]>; + } + + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in + // AMD64 cc clobbers RSI, RDI, XMM6-XMM15. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP], + usesCustomInserter = 1 in { + def TCRETURNdi64 : PseudoI<(outs), + (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops), + []>; + def TCRETURNri64 : PseudoI<(outs), + (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>; + let mayLoad = 1 in + def TCRETURNmi64 : PseudoI<(outs), + (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>; + + def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), + (ins i64i32imm_pcrel:$dst, variable_ops), + "jmp\t$dst # TAILCALL", []>; + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops), + "jmp{q}\t{*}$dst # TAILCALL", []>; + + let mayLoad = 1 in + def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), + "jmp{q}\t{*}$dst # TAILCALL", []>; +} diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td new file mode 100644 index 0000000..867c0f8 --- /dev/null +++ b/lib/Target/X86/X86InstrExtension.td @@ -0,0 +1,172 @@ +//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the sign and zero extension operations. +// +//===----------------------------------------------------------------------===// + +let neverHasSideEffects = 1 in { + let Defs = [AX], Uses = [AL] in + def CBW : I<0x98, RawFrm, (outs), (ins), + "{cbtw|cbw}", []>, OpSize; // AX = signext(AL) + let Defs = [EAX], Uses = [AX] in + def CWDE : I<0x98, RawFrm, (outs), (ins), + "{cwtl|cwde}", []>; // EAX = signext(AX) + + let Defs = [AX,DX], Uses = [AX] in + def CWD : I<0x99, RawFrm, (outs), (ins), + "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX) + let Defs = [EAX,EDX], Uses = [EAX] in + def CDQ : I<0x99, RawFrm, (outs), (ins), + "{cltd|cdq}", []>; // EDX:EAX = signext(EAX) + + + let Defs = [RAX], Uses = [EAX] in + def CDQE : RI<0x98, RawFrm, (outs), (ins), + "{cltq|cdqe}", []>; // RAX = signext(EAX) + + let Defs = [RAX,RDX], Uses = [RAX] in + def CQO : RI<0x99, RawFrm, (outs), (ins), + "{cqto|cqo}", []>; // RDX:RAX = signext(RAX) +} + + +// Sign/Zero extenders +// Use movsbl intead of movsbw; we don't care about the high 16 bits +// of the register here. This has a smaller encoding and avoids a +// partial-register update. Actual movsbw included for the disassembler. +def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), + "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), + "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + +// FIXME: Use a pat pattern or define a syntax here. +let isCodeGenOnly=1 in { +def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), + "", [(set GR16:$dst, (sext GR8:$src))]>, TB; +def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), + "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; +} +def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), + "movs{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sext GR8:$src))]>, TB; +def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), + "movs{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB; +def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), + "movs{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sext GR16:$src))]>, TB; +def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "movs{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; + +// Use movzbl intead of movzbw; we don't care about the high 16 bits +// of the register here. This has a smaller encoding and avoids a +// partial-register update. Actual movzbw included for the disassembler. +def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), + "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), + "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +// FIXME: Use a pat pattern or define a syntax here. +let isCodeGenOnly=1 in { +def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), + "", [(set GR16:$dst, (zext GR8:$src))]>, TB; +def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), + "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; +} +def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zext GR8:$src))]>, TB; +def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB; +def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), + "movz{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zext GR16:$src))]>, TB; +def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "movz{wl|x}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; + +// These are the same as the regular MOVZX32rr8 and MOVZX32rm8 +// except that they use GR32_NOREX for the output operand register class +// instead of GR32. This allows them to operate on h registers on x86-64. +def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, + (outs GR32_NOREX:$dst), (ins GR8:$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + []>, TB; +let mayLoad = 1 in +def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, + (outs GR32_NOREX:$dst), (ins i8mem:$src), + "movz{bl|x}\t{$src, $dst|$dst, $src}", + []>, TB; + +// MOVSX64rr8 always has a REX prefix and it has an 8-bit register +// operand, which makes it a rare instruction with an 8-bit register +// operand that can never access an h register. If support for h registers +// were generalized, this would require a special register class. +def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), + "movs{bq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR8:$src))]>, TB; +def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), + "movs{bq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB; +def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movs{wq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR16:$src))]>, TB; +def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movs{wq|x}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB; +def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), + "movs{lq|xd}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sext GR32:$src))]>; +def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), + "movs{lq|xd}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; + +// movzbq and movzwq encodings for the disassembler +def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), + "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), + "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; +def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB; + +// FIXME: These should be Pat patterns. +let isCodeGenOnly = 1 in { + +// Use movzbl instead of movzbq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), + "", [(set GR64:$dst, (zext GR8:$src))]>, TB; +def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), + "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; +// Use movzwl instead of movzwq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "", [(set GR64:$dst, (zext GR16:$src))]>, TB; +def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; + +// There's no movzlq instruction, but movl can be used for this purpose, using +// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero +// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit +// zero-extension, however this isn't possible when the 32-bit value is +// defined by a truncate or is copied from something where the high bits aren't +// necessarily all zero. In such cases, we fall back to these explicit zext +// instructions. +def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), + "", [(set GR64:$dst, (zext GR32:$src))]>; +def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), + "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; + + +} + diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 9c9bcc7..b506f5e 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -32,21 +32,24 @@ def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, - [SDNPHasChain, SDNPInFlag, SDNPMayStore]>; + [SDNPHasChain, SDNPInGlue, SDNPMayStore, + SDNPMemOperand]>; def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, - [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; + [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, + SDNPMemOperand]>; def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem, - [SDNPHasChain, SDNPMayStore]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem, - [SDNPHasChain, SDNPMayStore]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem, - [SDNPHasChain, SDNPMayStore]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, - [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; + [SDNPHasChain, SDNPMayStore, SDNPSideEffect, + SDNPMemOperand]>; //===----------------------------------------------------------------------===// // FPStack pattern fragments @@ -70,41 +73,23 @@ def fpimmneg1 : PatLeaf<(fpimm), [{ // Some 'special' instructions let usesCustomInserter = 1 in { // Expanded after instruction selection. - def FP32_TO_INT16_IN_MEM : I<0, Pseudo, - (outs), (ins i16mem:$dst, RFP32:$src), - "##FP32_TO_INT16_IN_MEM PSEUDO!", + def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src), [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>; - def FP32_TO_INT32_IN_MEM : I<0, Pseudo, - (outs), (ins i32mem:$dst, RFP32:$src), - "##FP32_TO_INT32_IN_MEM PSEUDO!", + def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src), [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>; - def FP32_TO_INT64_IN_MEM : I<0, Pseudo, - (outs), (ins i64mem:$dst, RFP32:$src), - "##FP32_TO_INT64_IN_MEM PSEUDO!", + def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src), [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>; - def FP64_TO_INT16_IN_MEM : I<0, Pseudo, - (outs), (ins i16mem:$dst, RFP64:$src), - "##FP64_TO_INT16_IN_MEM PSEUDO!", + def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src), [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>; - def FP64_TO_INT32_IN_MEM : I<0, Pseudo, - (outs), (ins i32mem:$dst, RFP64:$src), - "##FP64_TO_INT32_IN_MEM PSEUDO!", + def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src), [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>; - def FP64_TO_INT64_IN_MEM : I<0, Pseudo, - (outs), (ins i64mem:$dst, RFP64:$src), - "##FP64_TO_INT64_IN_MEM PSEUDO!", + def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src), [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>; - def FP80_TO_INT16_IN_MEM : I<0, Pseudo, - (outs), (ins i16mem:$dst, RFP80:$src), - "##FP80_TO_INT16_IN_MEM PSEUDO!", + def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src), [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>; - def FP80_TO_INT32_IN_MEM : I<0, Pseudo, - (outs), (ins i32mem:$dst, RFP80:$src), - "##FP80_TO_INT32_IN_MEM PSEUDO!", + def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src), [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>; - def FP80_TO_INT64_IN_MEM : I<0, Pseudo, - (outs), (ins i64mem:$dst, RFP80:$src), - "##FP80_TO_INT64_IN_MEM PSEUDO!", + def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src), [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>; } @@ -212,11 +197,11 @@ def _Fp80m64: FpI_<(outs RFP80:$dst), [(set RFP80:$dst, (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>; def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), - !strconcat("f", !strconcat(asmstring, "{s}\t$src"))> { + !strconcat("f", asmstring, "{s}\t$src")> { let mayLoad = 1; } def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), - !strconcat("f", !strconcat(asmstring, "{l}\t$src"))> { + !strconcat("f", asmstring, "{l}\t$src")> { let mayLoad = 1; } // ST(0) = ST(0) + [memint] @@ -245,11 +230,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), [(set RFP80:$dst, (OpNode RFP80:$src1, (X86fild addr:$src2, i32)))]>; def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), - !strconcat("fi", !strconcat(asmstring, "{s}\t$src"))> { + !strconcat("fi", asmstring, "{s}\t$src")> { let mayLoad = 1; } def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), - !strconcat("fi", !strconcat(asmstring, "{l}\t$src"))> { + !strconcat("fi", asmstring, "{l}\t$src")> { let mayLoad = 1; } } @@ -580,16 +565,16 @@ def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i) (outs), (ins RST:$reg), - "fucomi\t{$reg, %st(0)|%ST(0), $reg}">, DB; + "fucomi\t$reg">, DB; def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop (outs), (ins RST:$reg), - "fucomip\t{$reg, %st(0)|%ST(0), $reg}">, DF; + "fucompi\t$reg">, DF; } def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), - "fcomi\t{$reg, %st(0)|%ST(0), $reg}">, DB; + "fcomi\t$reg">, DB; def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), - "fcomip\t{$reg, %st(0)|%ST(0), $reg}">, DF; + "fcompi\t$reg">, DF; // Floating point flag ops. let Defs = [AX] in @@ -604,8 +589,8 @@ let mayLoad = 1 in def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] (outs), (ins i16mem:$dst), "fldcw\t$dst", []>; -// Register free - +// FPU control instructions +def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB; def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), "ffree\t$reg">, DD; @@ -613,7 +598,8 @@ def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB; -// Operandless floating-point instructions for the disassembler +// Operandless floating-point instructions for the disassembler. +def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9; def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9; @@ -639,8 +625,12 @@ def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE; def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), "fxsave\t$dst", []>, TB; +def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), + "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), "fxrstor\t$src", []>, TB; +def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), + "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 79187e9..344c14c 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -39,7 +39,8 @@ def MRM_E8 : Format<39>; def MRM_F0 : Format<40>; def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; -def RawFrmImm16 : Format<43>; +def RawFrmImm8 : Format<43>; +def RawFrmImm16 : Format<44>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -108,6 +109,7 @@ class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class VEX_L { bit hasVEX_L = 1; } +class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> @@ -123,6 +125,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, dag InOperandList = ins; string AsmString = AsmStr; + // If this is a pseudo instruction, mark it isCodeGenOnly. + let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo"); + // // Attributes specific to X86 instructions... // @@ -130,17 +135,18 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix? bits<4> Prefix = 0; // Which prefix byte does this inst have? - bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix? + bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix? FPFormat FPForm = NotFP; // What flavor of FP instruction is this? bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? bits<2> SegOvrBits = 0; // Segment override prefix. Domain ExeDomain = d; - bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix? + bit hasVEXPrefix = 0; // Does this inst require a VEX prefix? bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field? - bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? - bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register + bit hasVEX_4VPrefix = 0; // Does this inst require the VEX.VVVV field? + bit hasVEX_i8ImmReg = 0; // Does this inst require the last source register // to be encoded in a immediate field? - bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers? + bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? + bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -159,6 +165,12 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{34} = hasVEX_4VPrefix; let TSFlags{35} = hasVEX_i8ImmReg; let TSFlags{36} = hasVEX_L; + let TSFlags{37} = has3DNow0F0FOpcode; +} + +class PseudoI<dag oops, dag iops, list<dag> pattern> + : X86Inst<0, Pseudo, NoImm, oops, iops, ""> { + let Pattern = pattern; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 01149b6..5016c0f 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -15,51 +15,8 @@ // MMX Pattern Fragments //===----------------------------------------------------------------------===// -def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; - -def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; -def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; -def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; -def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; - -//===----------------------------------------------------------------------===// -// MMX Masks -//===----------------------------------------------------------------------===// - -// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to -// PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], MMX_SHUFFLE_get_shuf_imm>; +def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>; +def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; //===----------------------------------------------------------------------===// // SSE specific DAG Nodes. @@ -86,6 +43,21 @@ def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; +def X86pandn : SDNode<"X86ISD::PANDN", + SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86psignb : SDNode<"X86ISD::PSIGNB", + SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86psignw : SDNode<"X86ISD::PSIGNW", + SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86psignd : SDNode<"X86ISD::PSIGND", + SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86pblendv : SDNode<"X86ISD::PBLENDVB", + SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; def X86pextrw : SDNode<"X86ISD::PEXTRW", @@ -102,7 +74,7 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS", def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; @@ -134,18 +106,12 @@ def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; -def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>, - SDTCisInt<2>]>; - def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; -def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>; -def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>; - def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>; def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>; @@ -187,9 +153,11 @@ def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; // the top elements. These are used for the SSE 'ss' and 'sd' instruction // forms. def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, + SDNPWantRoot]>; def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, + SDNPWantRoot]>; def ssmem : Operand<v4f32> { let PrintMethod = "printf32mem"; @@ -273,6 +241,7 @@ def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; +def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>; def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; // 256-bit memop pattern fragments @@ -289,10 +258,7 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 8; }]>; -def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; -def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; -def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; -def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; +def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>; // MOVNT Support // Like 'store', but requires the non-temporal bit to be set @@ -376,6 +342,18 @@ def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePALIGNRImmediate(N)); }]>; +// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index +// to VEXTRACTF128 imm. +def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{ + return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N)); +}]>; + +// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to +// VINSERTF128 imm. +def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{ + return getI8Imm(X86::getInsertVINSERTF128Immediate(N)); +}]>; + def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); @@ -466,3 +444,16 @@ def palign : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); }], SHUFFLE_get_palign_imm>; + +def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index), + (extract_subvector node:$bigvec, + node:$index), [{ + return X86::isVEXTRACTF128Index(N); +}], EXTRACT_get_vextractf128_imm>; + +def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec, + node:$index), + (insert_subvector node:$bigvec, node:$smallvec, + node:$index), [{ + return X86::isVINSERTF128Index(N); +}], INSERT_get_vinsertf128_imm>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5280940..ceb1b65 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" - #include <limits> using namespace llvm; @@ -55,7 +54,11 @@ ReMatPICStubLoad("remat-pic-stub-load", X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), TM(tm), RI(tm, *this) { - SmallVector<unsigned,16> AmbEntries; + enum { + TB_NOT_REVERSABLE = 1U << 31, + TB_FLAGS = TB_NOT_REVERSABLE + }; + static const unsigned OpTbl2Addr[][2] = { { X86::ADC32ri, X86::ADC32mi }, { X86::ADC32ri8, X86::ADC32mi8 }, @@ -65,13 +68,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::ADC64rr, X86::ADC64mr }, { X86::ADD16ri, X86::ADD16mi }, { X86::ADD16ri8, X86::ADD16mi8 }, + { X86::ADD16ri_DB, X86::ADD16mi | TB_NOT_REVERSABLE }, + { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE }, { X86::ADD16rr, X86::ADD16mr }, + { X86::ADD16rr_DB, X86::ADD16mr | TB_NOT_REVERSABLE }, { X86::ADD32ri, X86::ADD32mi }, { X86::ADD32ri8, X86::ADD32mi8 }, + { X86::ADD32ri_DB, X86::ADD32mi | TB_NOT_REVERSABLE }, + { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE }, { X86::ADD32rr, X86::ADD32mr }, + { X86::ADD32rr_DB, X86::ADD32mr | TB_NOT_REVERSABLE }, { X86::ADD64ri32, X86::ADD64mi32 }, { X86::ADD64ri8, X86::ADD64mi8 }, + { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE }, + { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE }, { X86::ADD64rr, X86::ADD64mr }, + { X86::ADD64rr_DB, X86::ADD64mr | TB_NOT_REVERSABLE }, { X86::ADD8ri, X86::ADD8mi }, { X86::ADD8rr, X86::ADD8mr }, { X86::AND16ri, X86::AND16mi }, @@ -216,16 +228,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1]; - if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,0))).second) - assert(false && "Duplicated entries?"); + unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS; + assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); + RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); + + // If this is not a reversable operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 0, folded load and store, no alignment requirement. unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, - AuxInfo))).second) - AmbEntries.push_back(MemOp); + + assert(!MemOp2RegOpTable.count(MemOp) && + "Duplicated entries in unfolding maps?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } // If the third value is 1, then it's folding either a load or a store. @@ -252,8 +269,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::DIV64r, X86::DIV64m, 1, 0 }, { X86::DIV8r, X86::DIV8m, 1, 0 }, { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, - { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 }, { X86::IDIV16r, X86::IDIV16m, 1, 0 }, { X86::IDIV32r, X86::IDIV32m, 1, 0 }, { X86::IDIV64r, X86::IDIV64m, 1, 0 }, @@ -268,7 +285,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOV16rr, X86::MOV16mr, 0, 0 }, { X86::MOV32ri, X86::MOV32mi, 0, 0 }, { X86::MOV32rr, X86::MOV32mr, 0, 0 }, - { X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 }, { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, { X86::MOV64rr, X86::MOV64mr, 0, 0 }, { X86::MOV8ri, X86::MOV8mi, 0, 0 }, @@ -312,19 +328,22 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { - unsigned RegOp = OpTbl0[i][0]; - unsigned MemOp = OpTbl0[i][1]; - unsigned Align = OpTbl0[i][3]; - if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + unsigned RegOp = OpTbl0[i][0]; + unsigned MemOp = OpTbl0[i][1] & ~TB_FLAGS; unsigned FoldedLoad = OpTbl0[i][2]; + unsigned Align = OpTbl0[i][3]; + assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?"); + RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversable operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl0[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 0, folded load or store. unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); - if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - AmbEntries.push_back(MemOp); + assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } static const unsigned OpTbl1[][3] = { @@ -342,8 +361,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm | TB_NOT_REVERSABLE , 0 }, + { X86::FsMOVAPSrr, X86::MOVSSrm | TB_NOT_REVERSABLE , 0 }, { X86::IMUL16rri, X86::IMUL16rmi, 0 }, { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, { X86::IMUL32rri, X86::IMUL32rmi, 0 }, @@ -360,8 +379,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, - { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, - { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, + { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, + { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, @@ -370,8 +389,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, - { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, - { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, + { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, 16 }, + { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, 16 }, { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, @@ -380,7 +399,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, { X86::MOV16rr, X86::MOV16rm, 0 }, { X86::MOV32rr, X86::MOV32rm, 0 }, - { X86::MOV32rr_TC, X86::MOV32rm_TC, 0 }, { X86::MOV64rr, X86::MOV64rm, 0 }, { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, @@ -439,25 +457,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; - unsigned MemOp = OpTbl1[i][1]; + unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS; unsigned Align = OpTbl1[i][2]; - if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries"); + RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversable operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl1[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 1, folded load unsigned AuxInfo = 1 | (1 << 4); - if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - AmbEntries.push_back(MemOp); + assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } static const unsigned OpTbl2[][3] = { { X86::ADC32rr, X86::ADC32rm, 0 }, { X86::ADC64rr, X86::ADC64rm, 0 }, { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD16rr_DB, X86::ADD16rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD32rr_DB, X86::ADD32rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD64rr_DB, X86::ADD64rm | TB_NOT_REVERSABLE, 0 }, { X86::ADD8rr, X86::ADD8rm, 0 }, { X86::ADDPDrr, X86::ADDPDrm, 16 }, { X86::ADDPSrr, X86::ADDPSrm, 16 }, @@ -652,20 +676,23 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1]; + unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS; unsigned Align = OpTbl2[i][2]; - if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, - std::make_pair(MemOp,Align))).second) - assert(false && "Duplicated entries?"); + + assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); + RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); + + // If this is not a reversable operation (because there is a many->one) + // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. + if (OpTbl2[i][1] & TB_NOT_REVERSABLE) + continue; + // Index 2, folded load unsigned AuxInfo = 2 | (1 << 4); - if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, - std::make_pair(RegOp, AuxInfo))).second) - AmbEntries.push_back(MemOp); + assert(!MemOp2RegOpTable.count(MemOp) && + "Duplicated entries in unfolding maps?"); + MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo); } - - // Remove ambiguous entries. - assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); } bool @@ -745,9 +772,7 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOV8rm: case X86::MOV16rm: case X86::MOV32rm: - case X86::MOV32rm_TC: case X86::MOV64rm: - case X86::MOV64rm_TC: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: @@ -768,9 +793,7 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOV8mr: case X86::MOV16mr: case X86::MOV32mr: - case X86::MOV32mr_TC: case X86::MOV64mr: - case X86::MOV64mr_TC: case X86::ST_FpP64m: case X86::MOVSSmr: case X86::MOVSDmr: @@ -785,7 +808,7 @@ static bool isFrameStoreOpcode(int Opcode) { return false; } -unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, +unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex)) @@ -793,7 +816,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return 0; } -unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, +unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { if (isFrameLoadOpcode(MI->getOpcode())) { unsigned Reg; @@ -923,10 +946,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, isPICBase = true; } return isPICBase; - } + } return false; } - + case X86::LEA32r: case X86::LEA64r: { if (MI->getOperand(2).isImm() && @@ -1099,11 +1122,11 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ? X86::LEA64_32r : X86::LEA32r; MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); - unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - + // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // well be shifting and then extracting the lower 16-bits. // This has the potential to cause partial register stall. e.g. // movw (%rbp,%rcx,2), %dx // leal -65(%rdx), %esi @@ -1137,9 +1160,12 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, break; case X86::ADD16ri: case X86::ADD16ri8: - addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + case X86::ADD16ri_DB: + case X86::ADD16ri8_DB: + addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); break; - case X86::ADD16rr: { + case X86::ADD16rr: + case X86::ADD16rr_DB: { unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); unsigned leaInReg2 = 0; @@ -1149,9 +1175,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // just a single insert_subreg. addRegReg(MIB, leaInReg, true, leaInReg, false); } else { - leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); + leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. + // well be shifting and then extracting the lower 16-bits. BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); InsMI2 = BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) @@ -1218,7 +1244,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHUFPSrri: { assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; - + unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); if (B != C) return 0; @@ -1236,6 +1262,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; + // LEA can't handle RSP. + if (TargetRegisterInfo::isVirtualRegister(Src) && + !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass)) + return 0; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) @@ -1250,6 +1281,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; + // LEA can't handle ESP. + if (TargetRegisterInfo::isVirtualRegister(Src) && + !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass)) + return 0; + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) @@ -1288,6 +1324,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + + // LEA can't handle RSP. + if (TargetRegisterInfo::isVirtualRegister(Src) && + !MF.getRegInfo().constrainRegClass(Src, + MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass : + X86::GR32_NOSPRegisterClass)) + return 0; + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), @@ -1310,6 +1354,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + // LEA can't handle RSP. + if (TargetRegisterInfo::isVirtualRegister(Src) && + !MF.getRegInfo().constrainRegClass(Src, + MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass : + X86::GR32_NOSPRegisterClass)) + return 0; + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), @@ -1327,12 +1378,29 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Src, isKill, -1); break; case X86::ADD64rr: - case X86::ADD32rr: { + case X86::ADD64rr_DB: + case X86::ADD32rr: + case X86::ADD32rr_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r - : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + unsigned Opc; + TargetRegisterClass *RC; + if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) { + Opc = X86::LEA64r; + RC = X86::GR64_NOSPRegisterClass; + } else { + Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + RC = X86::GR32_NOSPRegisterClass; + } + + unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); + + // LEA can't handle RSP. + if (TargetRegisterInfo::isVirtualRegister(Src2) && + !MF.getRegInfo().constrainRegClass(Src2, RC)) + return 0; + NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), @@ -1341,7 +1409,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LV->replaceKillInstruction(Src2, MI, NewMI); break; } - case X86::ADD16rr: { + case X86::ADD16rr: + case X86::ADD16rr_DB: { if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); @@ -1357,6 +1426,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::ADD64ri32: case X86::ADD64ri8: + case X86::ADD64ri32_DB: + case X86::ADD64ri8_DB: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | @@ -1364,7 +1435,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Src, isKill, MI->getOperand(2).getImm()); break; case X86::ADD32ri: - case X86::ADD32ri8: { + case X86::ADD32ri8: + case X86::ADD32ri_DB: + case X86::ADD32ri8_DB: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) @@ -1375,6 +1448,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::ADD16ri: case X86::ADD16ri8: + case X86::ADD16ri_DB: + case X86::ADD16ri8_DB: if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); @@ -1396,7 +1471,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LV->replaceKillInstruction(Dest, MI, NewMI); } - MFI->insert(MBBI, NewMI); // Insert the new inst + MFI->insert(MBBI, NewMI); // Insert the new inst return NewMI; } @@ -1617,7 +1692,7 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isTerminator()) return false; - + // Conditional branch is a special case. if (TID.isBranch() && !TID.isBarrier()) return true; @@ -1626,7 +1701,7 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { return !isPredicated(MI); } -bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, @@ -1787,7 +1862,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); ++Count; } - + return Count; } @@ -1945,13 +2020,23 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, default: llvm_unreachable("Unknown regclass"); case X86::GR64RegClassID: + case X86::GR64_ABCDRegClassID: + case X86::GR64_NOREXRegClassID: + case X86::GR64_NOREX_NOSPRegClassID: case X86::GR64_NOSPRegClassID: + case X86::GR64_TCRegClassID: + case X86::GR64_TCW64RegClassID: return load ? X86::MOV64rm : X86::MOV64mr; case X86::GR32RegClassID: - case X86::GR32_NOSPRegClassID: + case X86::GR32_ABCDRegClassID: case X86::GR32_ADRegClassID: + case X86::GR32_NOREXRegClassID: + case X86::GR32_NOSPRegClassID: + case X86::GR32_TCRegClassID: return load ? X86::MOV32rm : X86::MOV32mr; case X86::GR16RegClassID: + case X86::GR16_ABCDRegClassID: + case X86::GR16_NOREXRegClassID: return load ? X86::MOV16rm : X86::MOV16mr; case X86::GR8RegClassID: // Copying to or from a physical H register on x86-64 requires a NOREX @@ -1961,32 +2046,14 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_ABCDRegClassID: - return load ? X86::MOV64rm : X86::MOV64mr; - case X86::GR32_ABCDRegClassID: - return load ? X86::MOV32rm : X86::MOV32mr; - case X86::GR16_ABCDRegClassID: - return load ? X86::MOV16rm : X86::MOV16mr; case X86::GR8_ABCD_LRegClassID: + case X86::GR8_NOREXRegClassID: return load ? X86::MOV8rm :X86::MOV8mr; case X86::GR8_ABCD_HRegClassID: if (TM.getSubtarget<X86Subtarget>().is64Bit()) return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_NOREXRegClassID: - case X86::GR64_NOREX_NOSPRegClassID: - return load ? X86::MOV64rm : X86::MOV64mr; - case X86::GR32_NOREXRegClassID: - return load ? X86::MOV32rm : X86::MOV32mr; - case X86::GR16_NOREXRegClassID: - return load ? X86::MOV16rm : X86::MOV16mr; - case X86::GR8_NOREXRegClassID: - return load ? X86::MOV8rm : X86::MOV8mr; - case X86::GR64_TCRegClassID: - return load ? X86::MOV64rm_TC : X86::MOV64mr_TC; - case X86::GR32_TCRegClassID: - return load ? X86::MOV32rm_TC : X86::MOV32mr_TC; case X86::RFP80RegClassID: return load ? X86::LD_Fp80m : X86::ST_FpP80m; case X86::RFP64RegClassID: @@ -2085,76 +2152,6 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, NewMIs.push_back(MIB); } -bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL = MBB.findDebugLoc(MI); - - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); - bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); - unsigned SlotSize = is64Bit ? 8 : 4; - - MachineFunction &MF = *MBB.getParent(); - unsigned FPReg = RI.getFrameRegister(MF); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - unsigned CalleeFrameSize = 0; - - unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; - for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - if (Reg == FPReg) - // X86RegisterInfo::emitPrologue will handle spilling of frame register. - continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, &RI); - } - } - - X86FI->setCalleeSavedFrameSize(CalleeFrameSize); - return true; -} - -bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL = MBB.findDebugLoc(MI); - - MachineFunction &MF = *MBB.getParent(); - unsigned FPReg = RI.getFrameRegister(MF); - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); - bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); - unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (Reg == FPReg) - // X86RegisterInfo::emitEpilogue will handle restoring of frame register. - continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - BuildMI(MBB, MI, DL, get(Opc), Reg); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, &RI); - } - } - return true; -} - MachineInstr* X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -2181,7 +2178,7 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, MIB.addOperand(MOs[i]); if (NumAddrOps < 4) // FrameIndex only addOffset(MIB, 0); - + // Loop over the rest of the ri operands, converting them over. unsigned NumOps = MI->getDesc().getNumOperands()-2; for (unsigned i = 0; i != NumOps; ++i) { @@ -2202,7 +2199,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), MI->getDebugLoc(), true); MachineInstrBuilder MIB(NewMI); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (i == OpNo) { @@ -2238,7 +2235,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Align) const { - const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; + const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && @@ -2251,7 +2248,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (isTwoAddr && NumOps >= 2 && i < 2 && MI->getOperand(0).isReg() && MI->getOperand(1).isReg() && - MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { + MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; isTwoAddrFold = true; } else if (i == 0) { // If operand 0 @@ -2265,19 +2262,19 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) return NewMI; - + OpcodeTablePtr = &RegOp2MemOpTable0; } else if (i == 1) { OpcodeTablePtr = &RegOp2MemOpTable1; } else if (i == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; } - + // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - OpcodeTablePtr->find((unsigned*)MI->getOpcode()); + DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = + OpcodeTablePtr->find(MI->getOpcode()); if (I != OpcodeTablePtr->end()) { unsigned Opcode = I->second.first; unsigned MinAlign = I->second.second; @@ -2320,8 +2317,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NewMI; } } - - // No fusion + + // No fusion if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; @@ -2332,7 +2329,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, int FrameIndex) const { - // Check switch flag + // Check switch flag if (NoFusing) return NULL; if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) @@ -2343,8 +2340,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::Int_CVTSS2SDrr: case X86::RCPSSr: case X86::RCPSSr_Int: - case X86::ROUNDSDr_Int: - case X86::ROUNDSSr_Int: + case X86::ROUNDSDr: + case X86::ROUNDSSr: case X86::RSQRTSSr: case X86::RSQRTSSr_Int: case X86::SQRTSSr: @@ -2384,7 +2381,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI) const { - // Check switch flag + // Check switch flag if (NoFusing) return NULL; if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) @@ -2395,8 +2392,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::Int_CVTSS2SDrr: case X86::RCPSSr: case X86::RCPSSr_Int: - case X86::ROUNDSDr_Int: - case X86::ROUNDSSr_Int: + case X86::ROUNDSDr: + case X86::ROUNDSSr: case X86::RSQRTSSr: case X86::RSQRTSSr_Int: case X86::SQRTSSr: @@ -2424,9 +2421,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = 16; break; case X86::FsFLD0SD: + case X86::VFsFLD0SD: Alignment = 8; break; case X86::FsFLD0SS: + case X86::VFsFLD0SS: Alignment = 4; break; default: @@ -2490,9 +2489,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineConstantPool &MCP = *MF.getConstantPool(); const Type *Ty; unsigned Opc = LoadMI->getOpcode(); - if (Opc == X86::FsFLD0SS) + if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); - else if (Opc == X86::FsFLD0SD) + else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); @@ -2525,13 +2524,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const { - // Check switch flag + // Check switch flag if (NoFusing) return 0; if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { switch (MI->getOpcode()) { default: return false; - case X86::TEST8rr: + case X86::TEST8rr: case X86::TEST16rr: case X86::TEST32rr: case X86::TEST64rr: @@ -2551,16 +2550,15 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; - if (isTwoAddr && NumOps >= 2 && OpNum < 2) { + const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; + if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 switch (Opc) { case X86::MOV8r0: case X86::MOV16r0: case X86::MOV32r0: - case X86::MOV64r0: - return true; + case X86::MOV64r0: return true; default: break; } OpcodeTablePtr = &RegOp2MemOpTable0; @@ -2569,22 +2567,17 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } else if (OpNum == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; } - - if (OpcodeTablePtr) { - // Find the Opcode to fuse - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - OpcodeTablePtr->find((unsigned*)Opc); - if (I != OpcodeTablePtr->end()) - return true; - } + + if (OpcodeTablePtr && OpcodeTablePtr->count(Opc)) + return true; return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); } bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl<MachineInstr*> &NewMIs) const { - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); + DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = + MemOp2RegOpTable.find(MI->getOpcode()); if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; @@ -2644,7 +2637,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the data processing instruction. MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); MachineInstrBuilder MIB(DataMI); - + if (FoldedStore) MIB.addReg(Reg, RegState::Define); for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) @@ -2712,8 +2705,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (!N->isMachineOpcode()) return false; - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); + DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = + MemOp2RegOpTable.find(N->getMachineOpcode()); if (I == MemOp2RegOpTable.end()) return false; unsigned Opc = I->second.first; @@ -2813,8 +2806,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex) const { - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = - MemOp2RegOpTable.find((unsigned*)Opc); + DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I = + MemOp2RegOpTable.find(Opc); if (I == MemOp2RegOpTable.end()) return 0; bool FoldedLoad = I->second.second & (1 << 4); @@ -2993,6 +2986,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: + case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11: + case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15: return true; } return false; @@ -3090,6 +3085,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +bool X86InstrInfo:: +hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + switch (DefMI->getOpcode()) { + default: return false; + case X86::DIVSDrm: + case X86::DIVSDrm_Int: + case X86::DIVSDrr: + case X86::DIVSDrr_Int: + case X86::DIVSSrm: + case X86::DIVSSrm_Int: + case X86::DIVSSrr: + case X86::DIVSSrr_Int: + case X86::SQRTPDm: + case X86::SQRTPDm_Int: + case X86::SQRTPDr: + case X86::SQRTPDr_Int: + case X86::SQRTPSm: + case X86::SQRTPSm_Int: + case X86::SQRTPSr: + case X86::SQRTPSr_Int: + case X86::SQRTSDm: + case X86::SQRTSDm_Int: + case X86::SQRTSDr: + case X86::SQRTSDr_Int: + case X86::SQRTSSm: + case X86::SQRTSSm_Int: + case X86::SQRTSSr: + case X86::SQRTSSr_Int: + return true; + } +} + namespace { /// CGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. @@ -3108,6 +3138,13 @@ namespace { if (TM->getRelocationModel() != Reloc::PIC_) return false; + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); + + // If we didn't need a GlobalBaseReg, don't insert code. + if (GlobalBaseReg == 0) + return false; + // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF.front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); @@ -3119,16 +3156,15 @@ namespace { if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); else - PC = TII->getGlobalBaseReg(&MF); - + PC = GlobalBaseReg; + // Operand of MovePCtoStack is completely ignored by asm printer. It's // only used in JIT code emission as displacement to pc. BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); - + // If we're using vanilla 'GOT' PIC style, we should use relative addressing // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) { - unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF); // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f336206..1d44207 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -174,7 +174,7 @@ namespace X86II { /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the /// reference is actually to the "FOO$stub" symbol. This is used for calls - /// and jumps to external functions on Tiger and before. + /// and jumps to external functions on Tiger and earlier. MO_DARWIN_STUB, /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the @@ -311,12 +311,17 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + + /// RawFrmImm8 - This is used for the ENTER instruction, which has two + /// immediates, the first of which is a 16-bit immediate (specified by + /// the imm encoding) and the second is a 8-bit fixed value. + RawFrmImm8 = 43, /// RawFrmImm16 - This is used for CALL FAR instructions, which have two /// immediates, the first of which is a 16 or 32-bit immediate (specified by /// the imm encoding) and the second is a 16-bit fixed value. In the AMD /// manual, this operand is described as pntr16:32 and pntr16:16 - RawFrmImm16 = 43, + RawFrmImm16 = 44, FormMask = 63, @@ -444,28 +449,36 @@ namespace X86II { OpcodeMask = 0xFF << OpcodeShift, //===------------------------------------------------------------------===// - // VEX - The opcode prefix used by AVX instructions + /// VEX - The opcode prefix used by AVX instructions VEX = 1U << 0, - // VEX_W - Has a opcode specific functionality, but is used in the same - // way as REX_W is for regular SSE instructions. + /// VEX_W - Has a opcode specific functionality, but is used in the same + /// way as REX_W is for regular SSE instructions. VEX_W = 1U << 1, - // VEX_4V - Used to specify an additional AVX/SSE register. Several 2 - // address instructions in SSE are represented as 3 address ones in AVX - // and the additional register is encoded in VEX_VVVV prefix. + /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2 + /// address instructions in SSE are represented as 3 address ones in AVX + /// and the additional register is encoded in VEX_VVVV prefix. VEX_4V = 1U << 2, - // VEX_I8IMM - Specifies that the last register used in a AVX instruction, - // must be encoded in the i8 immediate field. This usually happens in - // instructions with 4 operands. + /// VEX_I8IMM - Specifies that the last register used in a AVX instruction, + /// must be encoded in the i8 immediate field. This usually happens in + /// instructions with 4 operands. VEX_I8IMM = 1U << 3, - // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current - // instruction uses 256-bit wide registers. This is usually auto detected if - // a VR256 register is used, but some AVX instructions also have this field - // marked when using a f256 memory references. - VEX_L = 1U << 4 + /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current + /// instruction uses 256-bit wide registers. This is usually auto detected + /// if a VR256 register is used, but some AVX instructions also have this + /// field marked when using a f256 memory references. + VEX_L = 1U << 4, + + /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the + /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents + /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction + /// storing a classifier in the imm8 field. To simplify our implementation, + /// we handle this by storeing the classifier in the opcode field and using + /// this flag to indicate that the encoder should do the wacky 3DNow! thing. + Has3DNow0F0FOpcode = 1U << 5 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the @@ -528,6 +541,7 @@ namespace X86II { case X86II::AddRegFrm: case X86II::MRMDestReg: case X86II::MRMSrcReg: + case X86II::RawFrmImm8: case X86II::RawFrmImm16: return -1; case X86II::MRMDestMem: @@ -599,14 +613,14 @@ class X86InstrInfo : public TargetInstrInfoImpl { /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, /// RegOp2MemOpTable2 - Load / store folding opcode maps. /// - DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr; - DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0; - DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1; - DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2; + DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr; + DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0; + DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1; + DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2; /// MemOp2RegOpTable - Load / store unfolding opcode map. /// - DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable; + DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable; public: explicit X86InstrInfo(X86TargetMachine &tm); @@ -728,17 +742,6 @@ public: MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const; - - virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - - virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, @@ -845,18 +848,23 @@ public: /// SetSSEDomain - Set the SSEDomain of MI. void SetSSEDomain(MachineInstr *MI, unsigned Domain) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + unsigned OpNum, + const SmallVectorImpl<MachineOperand> &MOs, + unsigned Size, unsigned Alignment) const; + + bool hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - unsigned OpNum, - const SmallVectorImpl<MachineOperand> &MOs, - unsigned Size, unsigned Alignment) const; - /// isFrameOperand - Return true and the FrameIndex if the specified /// operand and follow operands form a reference to the stack frame. bool isFrameOperand(const MachineInstr *MI, unsigned int Op, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 09b7721..87dc4be 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1,10 +1,10 @@ -//===----------------------------------------------------------------------===// -// +//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 instruction set, defining the instructions, and @@ -35,6 +35,20 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>]>; + +// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS +def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<4, i32>]>; +// RES1, RES2, FLAGS = op LHS, RHS +def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; def SDTX86BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; @@ -46,7 +60,7 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; -def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, +def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -64,6 +78,12 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, SDTCisVT<1, iPTR>, SDTCisVT<2, iPTR>]>; +def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>, + SDTCisPtrTy<1>, + SDTCisVT<2, i32>, + SDTCisVT<3, i8>, + SDTCisVT<4, i32>]>; + def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; def SDTX86Void : SDTypeProfile<0, 0, []>; @@ -72,9 +92,7 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; - -def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>; +def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -110,82 +128,85 @@ def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, - SDNPMayLoad]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, - SDNPMayLoad]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, + SDNPMayLoad, SDNPMemOperand]>; def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, - [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, [SDNPHasChain, SDNPVariadic]>; - +def X86vaarg64 : + SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def X86callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>; def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, - [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>; + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def X86SegmentBaseAddress : SDNode<"X86ISD::SegmentBaseAddress", - SDT_X86SegmentBaseAddress, []>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; -def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, - [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; +def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, [SDNPCommutative]>; -def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags, +def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags, [SDNPCommutative]>; - +def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>; +def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>; + def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags, @@ -197,11 +218,11 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; -def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void, - [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; - +def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, - []>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. @@ -252,6 +273,10 @@ def i8mem_NOREX : Operand<i64> { let ParserMatchClass = X86MemAsmOperand; } +// GPRs available for tailcall. +// It represents GR64_TC or GR64_TCW64. +def ptr_rc_tailcall : PointerLikeRegClass<2>; + // Special i32mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. @@ -261,6 +286,15 @@ def i32mem_TC : Operand<i32> { let ParserMatchClass = X86MemAsmOperand; } +// Special i64mem for addresses of load folding tail calls. These are not +// allowed to use callee-saved registers since they must be scheduled +// after callee-saved register are popped. +def i64mem_TC : Operand<i64> { + let PrintMethod = "printi64mem"; + let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, + ptr_rc_tailcall, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} let ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "print_pcrel_imm" in { @@ -332,43 +366,77 @@ def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExti32i8AsmOperand; } +// 64-bits but only 32 bits are significant. +def i64i32imm : Operand<i64> { + let ParserMatchClass = ImmSExti64i32AsmOperand; +} + +// 64-bits but only 32 bits are significant, and those bits are treated as being +// pc relative. +def i64i32imm_pcrel : Operand<i64> { + let PrintMethod = "print_pcrel_imm"; + let ParserMatchClass = X86AbsMemAsmOperand; +} + +// 64-bits but only 8 bits are significant. +def i64i8imm : Operand<i64> { + let ParserMatchClass = ImmSExti64i8AsmOperand; +} + +def lea64_32mem : Operand<i32> { + let PrintMethod = "printi32mem"; + let AsmOperandLowerMethod = "lower_lea64_32mem"; + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + + //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. // // Define X86 specific addressing mode. -def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>; +def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>; def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr", [add, sub, mul, X86mul_imm, shl, or, frameindex], []>; def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; +def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr", + [add, sub, mul, X86mul_imm, shl, or, frameindex, + X86WrapperRIP], []>; + +def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", + [tglobaltlsaddr], []>; + //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. def HasCMov : Predicate<"Subtarget->hasCMov()">; def NoCMov : Predicate<"!Subtarget->hasCMov()">; -// FIXME: temporary hack to let codegen assert or generate poor code in case -// no AVX version of the desired intructions is present, this is better for -// incremental dev (without fallbacks it's easier to spot what's missing) -def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">; -def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; -def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; -def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; -def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; -def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; -def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; -def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">; +def HasMMX : Predicate<"Subtarget->hasMMX()">; +def Has3DNow : Predicate<"Subtarget->has3DNow()">; +def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; +def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; +def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; +def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; +def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; +def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; +def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; +def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; + +def HasAES : Predicate<"Subtarget->hasAES()">; def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; -def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; -def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; -def In32BitMode : Predicate<"!Subtarget->is64Bit()">; -def In64BitMode : Predicate<"Subtarget->is64Bit()">; +def FPStackf32 : Predicate<"!Subtarget->hasXMM()">; +def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">; +def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate; +def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; @@ -383,7 +451,6 @@ def OptForSize : Predicate<"OptForSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; -def HasAES : Predicate<"Subtarget->hasAES()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. @@ -418,40 +485,24 @@ def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>; def i16immSExt8 : PatLeaf<(i16 immSext8)>; def i32immSExt8 : PatLeaf<(i32 immSext8)>; - -/// Load patterns: these constraint the match to the right address space. -def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; - -def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 256; - return false; +def i64immSExt8 : PatLeaf<(i64 immSext8)>; +def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>; +def i64immZExt32 : PatLeaf<(i64 imm), [{ + // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // unsignedsign extended field. + return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); }]>; -def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 257; - return false; +def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{ + uint64_t v = N->getZExtValue(); + return v == (uint32_t)v && (int32_t)v == (int8_t)v; }]>; - // Helper fragments for loads. // It's always safe to treat a anyext i16 load as a i32 load if the i16 is // known to be 32-bit aligned or better. Ditto for i8 to i16. def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; @@ -462,10 +513,6 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::EXTLOAD) return LD->getAlignment() >= 2 && !LD->isVolatile(); @@ -474,10 +521,6 @@ def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) return true; @@ -486,15 +529,18 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ return false; }]>; -def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>; -def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>; +def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>; +def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>; +def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>; def zextloadi8i1 : PatFrag<(ops node:$ptr), (i8 (zextloadi1 node:$ptr))>; def zextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>; @@ -502,6 +548,10 @@ def zextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>; def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>; def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>; def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>; +def zextloadi64i1 : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>; +def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>; +def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>; +def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>; def extloadi8i1 : PatFrag<(ops node:$ptr), (i8 (extloadi1 node:$ptr))>; def extloadi16i1 : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>; @@ -509,6 +559,10 @@ def extloadi32i1 : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>; def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>; def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>; def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; +def extloadi64i1 : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>; +def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; +def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; +def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; // An 'and' node with a single use. @@ -524,66 +578,10 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ return N->hasOneUse(); }]>; -// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero. -def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) - return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); - APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); - APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); - return (~KnownZero0 & ~KnownZero1) == 0; -}]>; - //===----------------------------------------------------------------------===// -// Instruction list... +// Instruction list. // -// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into -// a stack adjustment and the codegen must know that they may modify the stack -// pointer before prolog-epilog rewriting occurs. -// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become -// sub / add which can clobber EFLAGS. -let Defs = [ESP, EFLAGS], Uses = [ESP] in { -def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), - "#ADJCALLSTACKDOWN", - [(X86callseq_start timm:$amt)]>, - Requires<[In32BitMode]>; -def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), - "#ADJCALLSTACKUP", - [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[In32BitMode]>; -} - -// x86-64 va_start lowering magic. -let usesCustomInserter = 1 in { -def VASTART_SAVE_XMM_REGS : I<0, Pseudo, - (outs), - (ins GR8:$al, - i64imm:$regsavefi, i64imm:$offset, - variable_ops), - "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", - [(X86vastart_save_xmm_regs GR8:$al, - imm:$regsavefi, - imm:$offset)]>; - -// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets. Calls -// to _alloca is needed to probe the stack when allocating more than 4k bytes in -// one go. Touching the stack at 4K increments is necessary to ensure that the -// guard pages used by the OS virtual memory manager are allocated in correct -// sequence. -// The main point of having separate instruction are extra unmodelled effects -// (compared to ordinary calls) like stack pointer change. - -let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in - def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins), - "# dynamic stack allocation", - [(X86MingwAlloca)]>; -} - // Nop let neverHasSideEffects = 1 in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; @@ -593,206 +591,22 @@ let neverHasSideEffects = 1 in { "nop{l}\t$zero", []>, TB; } -// Trap -let Uses = [EFLAGS] in { - def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; -} -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", - [(int_x86_int (i8 3))]>; -// FIXME: need to make sure that "int $3" matches int3 -def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", - [(int_x86_int imm:$trap)]>; -def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; -def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; - -// PIC base construction. This expands to code that looks like this: -// call $next_inst -// popl %destreg" -let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in - def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), - "", []>; - -//===----------------------------------------------------------------------===// -// Control Flow Instructions. -// - -// Return instructions. -let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, FPForm = SpecialFP in { - def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), - "ret", - [(X86retflag 0)]>; - def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), - "ret\t$amt", - [(X86retflag timm:$amt)]>; - def LRET : I <0xCB, RawFrm, (outs), (ins), - "lret", []>; - def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lret\t$amt", []>; -} - -// Unconditional branches. -let isBarrier = 1, isBranch = 1, isTerminator = 1 in { - def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), - "jmp\t$dst", [(br bb:$dst)]>; - def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), - "jmp\t$dst", []>; -} - -// Conditional Branches. -let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { - multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { - def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; - def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, - [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; - } -} - -defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; -defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; -defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; -defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; -defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; -defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; -defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; -defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; -defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; -defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; -defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; -defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; -defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; -defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; -defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; -defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; - -// FIXME: What about the CX/RCX versions of this instruction? -let Uses = [ECX], isBranch = 1, isTerminator = 1 in - def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), - "jcxz\t$dst", []>; - - -// Indirect branches -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { - def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)]>, Requires<[In32BitMode]>; - def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>; - - def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), - (ins i16imm:$off, i16imm:$seg), - "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize; - def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), - (ins i32imm:$off, i16imm:$seg), - "ljmp{l}\t{$seg, $off|$off, $seg}", []>; - - def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), - "ljmp{w}\t{*}$dst", []>, OpSize; - def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), - "ljmp{l}\t{*}$dst", []>; -} - - -// Loop instructions - -def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; -def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; -def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; - -//===----------------------------------------------------------------------===// -// Call Instructions... -// -let isCall = 1 in - // All calls clobber the non-callee saved registers. ESP is marked as - // a use to prevent stack-pointer assignments that appear immediately - // before calls from potentially appearing dead. Uses for argument - // registers are added manually. - let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { - def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i32imm_pcrel:$dst,variable_ops), - "call\t$dst", []>; - def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), - "call\t{*}$dst", [(X86call GR32:$dst)]>; - def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), - "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>; - - def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), - (ins i16imm:$off, i16imm:$seg), - "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize; - def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), - (ins i32imm:$off, i16imm:$seg), - "lcall{l}\t{$seg, $off|$off, $seg}", []>; - - def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), - "lcall{w}\t{*}$dst", []>, OpSize; - def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), - "lcall{l}\t{*}$dst", []>; - - // callw for 16 bit code for the assembler. - let isAsmParserOnly = 1 in - def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, - (outs), (ins i16imm_pcrel:$dst, variable_ops), - "callw\t$dst", []>, OpSize; - } // Constructing a stack frame. +def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl), + "enter\t$len, $lvl", []>; -def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl), - "enter\t$len, $lvl", []>; - -// Tail call stuff. - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1 in - let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { - def TCRETURNdi : I<0, Pseudo, (outs), - (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - def TCRETURNri : I<0, Pseudo, (outs), - (ins GR32_TC:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - let mayLoad = 1 in - def TCRETURNmi : I<0, Pseudo, (outs), - (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), - "#TC_RETURN $dst $offset", []>; - - // FIXME: The should be pseudo instructions that are lowered when going to - // mcinst. - def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i32imm_pcrel:$dst, variable_ops), - "jmp\t$dst # TAILCALL", - []>; - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), - "", []>; // FIXME: Remove encoding when JIT is dead. - let mayLoad = 1 in - def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), - "jmp{l}\t{*}$dst # TAILCALL", []>; -} - -//===----------------------------------------------------------------------===// -// Miscellaneous Instructions... -// let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", []>, Requires<[In32BitMode]>; -def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; -let mayLoad = 1 in -def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; -def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; -let mayLoad = 1 in -def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; +let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in +def LEAVE64 : I<0xC9, RawFrm, + (outs), (ins), "leave", []>, Requires<[In64BitMode]>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { let mayLoad = 1 in { @@ -805,6 +619,10 @@ def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>, OpSize; def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>; + +def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; +def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, + Requires<[In32BitMode]>; } let mayStore = 1 in { @@ -817,29 +635,54 @@ def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>, OpSize; def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>; -} -} -let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), +def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), "push{l}\t$imm", []>; -def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), +def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{w}\t$imm", []>, OpSize; -def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), +def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), "push{l}\t$imm", []>; -} -let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in { -def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; -def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, - Requires<[In32BitMode]>; -} -let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in { def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, Requires<[In32BitMode]>; + +} +} + +let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { +let mayLoad = 1 in { +def POP64r : I<0x58, AddRegFrm, + (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; +def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; +def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>; +} +let mayStore = 1 in { +def PUSH64r : I<0x50, AddRegFrm, + (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; +} } +let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { +def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), + "push{q}\t$imm", []>; +def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), + "push{q}\t$imm", []>; +def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), + "push{q}\t$imm", []>; +} + +let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in +def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>, + Requires<[In64BitMode]>; +let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in +def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>, + Requires<[In64BitMode]>; + + + let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], mayLoad=1, neverHasSideEffects=1 in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>, @@ -851,12 +694,16 @@ def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>, Requires<[In32BitMode]>; } -let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32 - def BSWAP32r : I<0xC8, AddRegFrm, - (outs GR32:$dst), (ins GR32:$src), - "bswap{l}\t$dst", - [(set GR32:$dst, (bswap GR32:$src))]>, TB; +let Constraints = "$src = $dst" in { // GR32 = bswap GR32 +def BSWAP32r : I<0xC8, AddRegFrm, + (outs GR32:$dst), (ins GR32:$src), + "bswap{l}\t$dst", + [(set GR32:$dst, (bswap GR32:$src))]>, TB; +def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), + "bswap{q}\t$dst", + [(set GR64:$dst, (bswap GR64:$src))]>, TB; +} // Constraints = "$src = $dst" // Bit scan instructions. let Defs = [EFLAGS] in { @@ -873,6 +720,12 @@ def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB; +def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "bsf{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB; +def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "bsf{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", @@ -887,44 +740,23 @@ def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB; +def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "bsr{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB; +def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "bsr{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB; } // Defs = [EFLAGS] -let neverHasSideEffects = 1 in -def LEA16r : I<0x8D, MRMSrcMem, - (outs GR16:$dst), (ins i32mem:$src), - "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; -let isReMaterializable = 1 in -def LEA32r : I<0x8D, MRMSrcMem, - (outs GR32:$dst), (ins i32mem:$src), - "lea{l}\t{$src|$dst}, {$dst|$src}", - [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; - -let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { -def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", - [(X86rep_movs i8)]>, REP; -def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", - [(X86rep_movs i16)]>, REP, OpSize; -def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", - [(X86rep_movs i32)]>, REP; -} // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>; def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize; def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>; +def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; } -let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", - [(X86rep_stos i8)]>, REP; -let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", - [(X86rep_stos i16)]>, REP, OpSize; -let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", - [(X86rep_stos i32)]>, REP; - // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>; @@ -932,91 +764,24 @@ let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize; let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>; +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in +def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>; def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize; def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>; def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize; def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>; - -let Defs = [RAX, RDX] in -def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, - TB; - -let Defs = [RAX, RCX, RDX] in -def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; - -let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { -def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; -} - -def SYSCALL : I<0x05, RawFrm, - (outs), (ins), "syscall", []>, TB; -def SYSRET : I<0x07, RawFrm, - (outs), (ins), "sysret", []>, TB; -def SYSENTER : I<0x34, RawFrm, - (outs), (ins), "sysenter", []>, TB; -def SYSEXIT : I<0x35, RawFrm, - (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>; - -def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; //===----------------------------------------------------------------------===// -// Input/Output Instructions... +// Move Instructions. // -let Defs = [AL], Uses = [DX] in -def IN8rr : I<0xEC, RawFrm, (outs), (ins), - "in{b}\t{%dx, %al|%AL, %DX}", []>; -let Defs = [AX], Uses = [DX] in -def IN16rr : I<0xED, RawFrm, (outs), (ins), - "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize; -let Defs = [EAX], Uses = [DX] in -def IN32rr : I<0xED, RawFrm, (outs), (ins), - "in{l}\t{%dx, %eax|%EAX, %DX}", []>; - -let Defs = [AL] in -def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i16i8imm:$port), - "in{b}\t{$port, %al|%AL, $port}", []>; -let Defs = [AX] in -def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port), - "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize; -let Defs = [EAX] in -def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i16i8imm:$port), - "in{l}\t{$port, %eax|%EAX, $port}", []>; - -let Uses = [DX, AL] in -def OUT8rr : I<0xEE, RawFrm, (outs), (ins), - "out{b}\t{%al, %dx|%DX, %AL}", []>; -let Uses = [DX, AX] in -def OUT16rr : I<0xEF, RawFrm, (outs), (ins), - "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize; -let Uses = [DX, EAX] in -def OUT32rr : I<0xEF, RawFrm, (outs), (ins), - "out{l}\t{%eax, %dx|%DX, %EAX}", []>; - -let Uses = [AL] in -def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i16i8imm:$port), - "out{b}\t{%al, $port|$port, %AL}", []>; -let Uses = [AX] in -def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port), - "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize; -let Uses = [EAX] in -def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i16i8imm:$port), - "out{l}\t{%eax, $port|$port, %EAX}", []>; - -def IN8 : I<0x6C, RawFrm, (outs), (ins), - "ins{b}", []>; -def IN16 : I<0x6D, RawFrm, (outs), (ins), - "ins{w}", []>, OpSize; -def IN32 : I<0x6D, RawFrm, (outs), (ins), - "ins{l}", []>; -//===----------------------------------------------------------------------===// -// Move Instructions... -// let neverHasSideEffects = 1 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", []>; @@ -1024,6 +789,8 @@ def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), @@ -1035,6 +802,12 @@ def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src), def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, imm:$src)]>; +def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), + "movabs{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, imm:$src)]>; +def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, i64immSExt32:$src)]>; } def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), @@ -1046,6 +819,9 @@ def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src), def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; +def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(store i64immSExt32:$src, addr:$dst)]>; /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. @@ -1067,24 +843,22 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>, Requires<[In32BitMode]>; - -// Moves to and from segment registers -def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; -def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; + +// FIXME: These definitions are utterly broken +// Just leave them commented out for now because they're useless outside +// of the large code model, and most compilers won't generate the instructions +// in question. +/* +def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +*/ + let isCodeGenOnly = 1 in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), @@ -1093,6 +867,8 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; } let canFoldAsLoad = 1, isReMaterializable = 1 in { @@ -1105,6 +881,9 @@ def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (loadi32 addr:$src))]>; +def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (load addr:$src))]>; } def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), @@ -1116,24 +895,9 @@ def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store GR32:$src, addr:$dst)]>; - -/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC. -let isCodeGenOnly = 1 in { -let neverHasSideEffects = 1 in -def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; - -let mayLoad = 1, - canFoldAsLoad = 1, isReMaterializable = 1 in -def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src), - "mov{l}\t{$src, $dst|$dst, $src}", - []>; - -let mayStore = 1 in -def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src), - "mov{l}\t{$src, $dst|$dst, $src}", - []>; -} +def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", + [(store GR64:$src, addr:$dst)]>; // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be @@ -1154,2219 +918,6 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem, "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; } -// Moves to and from debug registers -def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; - -// Moves to and from control registers -def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; - -//===----------------------------------------------------------------------===// -// Fixed-Register Multiplication and Division Instructions... -// - -// Extra precision multiplication - -// AL is really implied by AX, but the registers in Defs must match the -// SDNode results (i8, i32). -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", - // FIXME: Used for 8-bit mul, ignore result upper 8 bits. - // This probably ought to be moved to a def : Pat<> if the - // syntax can be accepted. - [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)]>; // AL,AH = AL*GR8 - -let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in -def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), - "mul{w}\t$src", - []>, OpSize; // AX,DX = AX*GR16 - -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in -def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", - []>; // EAX,EDX = EAX*GR32 - -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), - "mul{b}\t$src", - // FIXME: Used for 8-bit mul, ignore result upper 8 bits. - // This probably ought to be moved to a def : Pat<> if the - // syntax can be accepted. - [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>; // AL,AH = AL*[mem8] - -let mayLoad = 1, neverHasSideEffects = 1 in { -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), - "mul{w}\t$src", - []>, OpSize; // AX,DX = AX*[mem16] - -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), - "mul{l}\t$src", - []>; // EAX,EDX = EAX*[mem32] -} - -let neverHasSideEffects = 1 in { -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; - // AL,AH = AL*GR8 -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, - OpSize; // AX,DX = AX*GR16 -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; - // EAX,EDX = EAX*GR32 -let mayLoad = 1 in { -let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>; // AL,AH = AL*[mem8] -let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] -} -} // neverHasSideEffects - -// unsigned division/remainder -let Defs = [AL,EFLAGS,AX], Uses = [AX] in -def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH - "div{b}\t$src", []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX - "div{w}\t$src", []>, OpSize; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX - "div{l}\t$src", []>; -let mayLoad = 1 in { -let Defs = [AL,EFLAGS,AX], Uses = [AX] in -def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", []>, OpSize; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in - // EDX:EAX/[mem32] = EAX,EDX -def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", []>; -} - -// Signed division/remainder. -let Defs = [AL,EFLAGS,AX], Uses = [AX] in -def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH - "idiv{b}\t$src", []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX - "idiv{w}\t$src", []>, OpSize; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX - "idiv{l}\t$src", []>; -let mayLoad = 1, mayLoad = 1 in { -let Defs = [AL,EFLAGS,AX], Uses = [AX] in -def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", []>; -let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in -def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", []>, OpSize; -let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in -def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - // EDX:EAX/[mem32] = EAX,EDX - "idiv{l}\t$src", []>; -} - -//===----------------------------------------------------------------------===// -// Two address Instructions. -// -let Constraints = "$src1 = $dst" in { - -// Conditional moves -let Uses = [EFLAGS] in { - -let Predicates = [HasCMov] in { -let isCommutable = 1 in { -def CMOVB16rr : I<0x42, MRMSrcReg, // if <u, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_B, EFLAGS))]>, - TB, OpSize; -def CMOVB32rr : I<0x42, MRMSrcReg, // if <u, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_B, EFLAGS))]>, - TB; -def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovae{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_AE, EFLAGS))]>, - TB, OpSize; -def CMOVAE32rr: I<0x43, MRMSrcReg, // if >=u, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovae{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_AE, EFLAGS))]>, - TB; -def CMOVE16rr : I<0x44, MRMSrcReg, // if ==, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmove{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_E, EFLAGS))]>, - TB, OpSize; -def CMOVE32rr : I<0x44, MRMSrcReg, // if ==, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmove{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_E, EFLAGS))]>, - TB; -def CMOVNE16rr: I<0x45, MRMSrcReg, // if !=, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovne{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_NE, EFLAGS))]>, - TB, OpSize; -def CMOVNE32rr: I<0x45, MRMSrcReg, // if !=, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovne{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_NE, EFLAGS))]>, - TB; -def CMOVBE16rr: I<0x46, MRMSrcReg, // if <=u, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovbe{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_BE, EFLAGS))]>, - TB, OpSize; -def CMOVBE32rr: I<0x46, MRMSrcReg, // if <=u, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovbe{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_BE, EFLAGS))]>, - TB; -def CMOVA16rr : I<0x47, MRMSrcReg, // if >u, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmova{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_A, EFLAGS))]>, - TB, OpSize; -def CMOVA32rr : I<0x47, MRMSrcReg, // if >u, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmova{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_A, EFLAGS))]>, - TB; -def CMOVL16rr : I<0x4C, MRMSrcReg, // if <s, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovl{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_L, EFLAGS))]>, - TB, OpSize; -def CMOVL32rr : I<0x4C, MRMSrcReg, // if <s, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovl{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_L, EFLAGS))]>, - TB; -def CMOVGE16rr: I<0x4D, MRMSrcReg, // if >=s, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovge{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_GE, EFLAGS))]>, - TB, OpSize; -def CMOVGE32rr: I<0x4D, MRMSrcReg, // if >=s, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovge{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_GE, EFLAGS))]>, - TB; -def CMOVLE16rr: I<0x4E, MRMSrcReg, // if <=s, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovle{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_LE, EFLAGS))]>, - TB, OpSize; -def CMOVLE32rr: I<0x4E, MRMSrcReg, // if <=s, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovle{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_LE, EFLAGS))]>, - TB; -def CMOVG16rr : I<0x4F, MRMSrcReg, // if >s, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovg{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_G, EFLAGS))]>, - TB, OpSize; -def CMOVG32rr : I<0x4F, MRMSrcReg, // if >s, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovg{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_G, EFLAGS))]>, - TB; -def CMOVS16rr : I<0x48, MRMSrcReg, // if signed, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovs{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_S, EFLAGS))]>, - TB, OpSize; -def CMOVS32rr : I<0x48, MRMSrcReg, // if signed, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovs{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_S, EFLAGS))]>, - TB; -def CMOVNS16rr: I<0x49, MRMSrcReg, // if !signed, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovns{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_NS, EFLAGS))]>, - TB, OpSize; -def CMOVNS32rr: I<0x49, MRMSrcReg, // if !signed, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovns{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_NS, EFLAGS))]>, - TB; -def CMOVP16rr : I<0x4A, MRMSrcReg, // if parity, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovp{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_P, EFLAGS))]>, - TB, OpSize; -def CMOVP32rr : I<0x4A, MRMSrcReg, // if parity, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovp{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_P, EFLAGS))]>, - TB; -def CMOVNP16rr : I<0x4B, MRMSrcReg, // if !parity, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovnp{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_NP, EFLAGS))]>, - TB, OpSize; -def CMOVNP32rr : I<0x4B, MRMSrcReg, // if !parity, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovnp{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_NP, EFLAGS))]>, - TB; -def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovo{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_O, EFLAGS))]>, - TB, OpSize; -def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovo{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_O, EFLAGS))]>, - TB; -def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16 - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "cmovno{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, - X86_COND_NO, EFLAGS))]>, - TB, OpSize; -def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32 - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "cmovno{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2, - X86_COND_NO, EFLAGS))]>, - TB; -} // isCommutable = 1 - -def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_B, EFLAGS))]>, - TB, OpSize; -def CMOVB32rm : I<0x42, MRMSrcMem, // if <u, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_B, EFLAGS))]>, - TB; -def CMOVAE16rm: I<0x43, MRMSrcMem, // if >=u, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovae{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_AE, EFLAGS))]>, - TB, OpSize; -def CMOVAE32rm: I<0x43, MRMSrcMem, // if >=u, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovae{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_AE, EFLAGS))]>, - TB; -def CMOVE16rm : I<0x44, MRMSrcMem, // if ==, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmove{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_E, EFLAGS))]>, - TB, OpSize; -def CMOVE32rm : I<0x44, MRMSrcMem, // if ==, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmove{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_E, EFLAGS))]>, - TB; -def CMOVNE16rm: I<0x45, MRMSrcMem, // if !=, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovne{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_NE, EFLAGS))]>, - TB, OpSize; -def CMOVNE32rm: I<0x45, MRMSrcMem, // if !=, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovne{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_NE, EFLAGS))]>, - TB; -def CMOVBE16rm: I<0x46, MRMSrcMem, // if <=u, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovbe{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_BE, EFLAGS))]>, - TB, OpSize; -def CMOVBE32rm: I<0x46, MRMSrcMem, // if <=u, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovbe{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_BE, EFLAGS))]>, - TB; -def CMOVA16rm : I<0x47, MRMSrcMem, // if >u, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmova{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_A, EFLAGS))]>, - TB, OpSize; -def CMOVA32rm : I<0x47, MRMSrcMem, // if >u, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmova{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_A, EFLAGS))]>, - TB; -def CMOVL16rm : I<0x4C, MRMSrcMem, // if <s, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovl{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_L, EFLAGS))]>, - TB, OpSize; -def CMOVL32rm : I<0x4C, MRMSrcMem, // if <s, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovl{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_L, EFLAGS))]>, - TB; -def CMOVGE16rm: I<0x4D, MRMSrcMem, // if >=s, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovge{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_GE, EFLAGS))]>, - TB, OpSize; -def CMOVGE32rm: I<0x4D, MRMSrcMem, // if >=s, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovge{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_GE, EFLAGS))]>, - TB; -def CMOVLE16rm: I<0x4E, MRMSrcMem, // if <=s, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovle{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_LE, EFLAGS))]>, - TB, OpSize; -def CMOVLE32rm: I<0x4E, MRMSrcMem, // if <=s, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovle{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_LE, EFLAGS))]>, - TB; -def CMOVG16rm : I<0x4F, MRMSrcMem, // if >s, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovg{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_G, EFLAGS))]>, - TB, OpSize; -def CMOVG32rm : I<0x4F, MRMSrcMem, // if >s, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovg{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_G, EFLAGS))]>, - TB; -def CMOVS16rm : I<0x48, MRMSrcMem, // if signed, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovs{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_S, EFLAGS))]>, - TB, OpSize; -def CMOVS32rm : I<0x48, MRMSrcMem, // if signed, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovs{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_S, EFLAGS))]>, - TB; -def CMOVNS16rm: I<0x49, MRMSrcMem, // if !signed, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovns{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_NS, EFLAGS))]>, - TB, OpSize; -def CMOVNS32rm: I<0x49, MRMSrcMem, // if !signed, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovns{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_NS, EFLAGS))]>, - TB; -def CMOVP16rm : I<0x4A, MRMSrcMem, // if parity, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovp{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_P, EFLAGS))]>, - TB, OpSize; -def CMOVP32rm : I<0x4A, MRMSrcMem, // if parity, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovp{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_P, EFLAGS))]>, - TB; -def CMOVNP16rm : I<0x4B, MRMSrcMem, // if !parity, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovnp{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_NP, EFLAGS))]>, - TB, OpSize; -def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovnp{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_NP, EFLAGS))]>, - TB; -def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovo{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_O, EFLAGS))]>, - TB, OpSize; -def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovo{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_O, EFLAGS))]>, - TB; -def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16] - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "cmovno{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2), - X86_COND_NO, EFLAGS))]>, - TB, OpSize; -def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32] - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "cmovno{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2), - X86_COND_NO, EFLAGS))]>, - TB; -} // Predicates = [HasCMov] - -// X86 doesn't have 8-bit conditional moves. Use a customInserter to -// emit control flow. An alternative to this is to mark i8 SELECT as Promote, -// however that requires promoting the operands, and can induce additional -// i8 register pressure. Note that CMOV_GR8 is conservatively considered to -// clobber EFLAGS, because if one of the operands is zero, the expansion -// could involve an xor. -let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { -def CMOV_GR8 : I<0, Pseudo, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), - "#CMOV_GR8 PSEUDO!", - [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2, - imm:$cond, EFLAGS))]>; - -let Predicates = [NoCMov] in { -def CMOV_GR32 : I<0, Pseudo, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond), - "#CMOV_GR32* PSEUDO!", - [(set GR32:$dst, - (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>; -def CMOV_GR16 : I<0, Pseudo, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond), - "#CMOV_GR16* PSEUDO!", - [(set GR16:$dst, - (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>; -def CMOV_RFP32 : I<0, Pseudo, - (outs RFP32:$dst), - (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), - "#CMOV_RFP32 PSEUDO!", - [(set RFP32:$dst, - (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, - EFLAGS))]>; -def CMOV_RFP64 : I<0, Pseudo, - (outs RFP64:$dst), - (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), - "#CMOV_RFP64 PSEUDO!", - [(set RFP64:$dst, - (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, - EFLAGS))]>; -def CMOV_RFP80 : I<0, Pseudo, - (outs RFP80:$dst), - (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), - "#CMOV_RFP80 PSEUDO!", - [(set RFP80:$dst, - (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, - EFLAGS))]>; -} // Predicates = [NoCMov] -} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] -} // Uses = [EFLAGS] - - -// unary instructions -let CodeSize = 2 in { -let Defs = [EFLAGS] in { -def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), - "neg{b}\t$dst", - [(set GR8:$dst, (ineg GR8:$src1)), - (implicit EFLAGS)]>; -def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "neg{w}\t$dst", - [(set GR16:$dst, (ineg GR16:$src1)), - (implicit EFLAGS)]>, OpSize; -def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "neg{l}\t$dst", - [(set GR32:$dst, (ineg GR32:$src1)), - (implicit EFLAGS)]>; - -let Constraints = "" in { - def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), - "neg{b}\t$dst", - [(store (ineg (loadi8 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; - def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), - "neg{w}\t$dst", - [(store (ineg (loadi16 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), - "neg{l}\t$dst", - [(store (ineg (loadi32 addr:$dst)), addr:$dst), - (implicit EFLAGS)]>; -} // Constraints = "" -} // Defs = [EFLAGS] - -// Match xor -1 to not. Favors these over a move imm + xor to save code size. -let AddedComplexity = 15 in { -def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), - "not{b}\t$dst", - [(set GR8:$dst, (not GR8:$src1))]>; -def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "not{w}\t$dst", - [(set GR16:$dst, (not GR16:$src1))]>, OpSize; -def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "not{l}\t$dst", - [(set GR32:$dst, (not GR32:$src1))]>; -} -let Constraints = "" in { - def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), - "not{b}\t$dst", - [(store (not (loadi8 addr:$dst)), addr:$dst)]>; - def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), - "not{w}\t$dst", - [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; - def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), - "not{l}\t$dst", - [(store (not (loadi32 addr:$dst)), addr:$dst)]>; -} // Constraints = "" -} // CodeSize - -// TODO: inc/dec is slow for P4, but fast for Pentium-M. -let Defs = [EFLAGS] in { -let CodeSize = 2 in -def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), - "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; - -let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), - "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, - OpSize, Requires<[In32BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), - "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, - Requires<[In32BitMode]>; -} -let Constraints = "", CodeSize = 2 in { - def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", - [(store (add (loadi8 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>; - def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In32BitMode]>; - def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In32BitMode]>; -} // Constraints = "", CodeSize = 2 - -let CodeSize = 2 in -def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), - "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; -let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), - "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, - OpSize, Requires<[In32BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), - "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, - Requires<[In32BitMode]>; -} // CodeSize = 2 - -let Constraints = "", CodeSize = 2 in { - def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", - [(store (add (loadi8 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>; - def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In32BitMode]>; - def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In32BitMode]>; -} // Constraints = "", CodeSize = 2 -} // Defs = [EFLAGS] - -// Logical operators... -let Defs = [EFLAGS] in { -let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y -def AND8rr : I<0x20, MRMDestReg, - (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>; -def AND16rr : I<0x21, MRMDestReg, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, - GR16:$src2))]>, OpSize; -def AND32rr : I<0x21, MRMDestReg, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, - GR32:$src2))]>; -} - -// AND instructions with the destination register in REG and the source register -// in R/M. Included for the disassembler. -let isCodeGenOnly = 1 in { -def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", []>; -def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def AND8rm : I<0x22, MRMSrcMem, - (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, - (loadi8 addr:$src2)))]>; -def AND16rm : I<0x23, MRMSrcMem, - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, - (loadi16 addr:$src2)))]>, - OpSize; -def AND32rm : I<0x23, MRMSrcMem, - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, - (loadi32 addr:$src2)))]>; - -def AND8ri : Ii8<0x80, MRM4r, - (outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2), - "and{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, - imm:$src2))]>; -def AND16ri : Ii16<0x81, MRM4r, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, - imm:$src2))]>, OpSize; -def AND32ri : Ii32<0x81, MRM4r, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, - imm:$src2))]>; -def AND16ri8 : Ii8<0x83, MRM4r, - (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), - "and{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1, - i16immSExt8:$src2))]>, - OpSize; -def AND32ri8 : Ii8<0x83, MRM4r, - (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), - "and{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, - i32immSExt8:$src2))]>; - -let Constraints = "" in { - def AND8mr : I<0x20, MRMDestMem, - (outs), (ins i8mem :$dst, GR8 :$src), - "and{b}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), GR8:$src), addr:$dst), - (implicit EFLAGS)]>; - def AND16mr : I<0x21, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src), - "and{w}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), GR16:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def AND32mr : I<0x21, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src), - "and{l}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), GR32:$src), addr:$dst), - (implicit EFLAGS)]>; - def AND8mi : Ii8<0x80, MRM4m, - (outs), (ins i8mem :$dst, i8imm :$src), - "and{b}\t{$src, $dst|$dst, $src}", - [(store (and (loadi8 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def AND16mi : Ii16<0x81, MRM4m, - (outs), (ins i16mem:$dst, i16imm:$src), - "and{w}\t{$src, $dst|$dst, $src}", - [(store (and (loadi16 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def AND32mi : Ii32<0x81, MRM4m, - (outs), (ins i32mem:$dst, i32imm:$src), - "and{l}\t{$src, $dst|$dst, $src}", - [(store (and (loadi32 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def AND16mi8 : Ii8<0x83, MRM4m, - (outs), (ins i16mem:$dst, i16i8imm :$src), - "and{w}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), i16immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def AND32mi8 : Ii8<0x83, MRM4m, - (outs), (ins i32mem:$dst, i32i8imm :$src), - "and{l}\t{$src, $dst|$dst, $src}", - [(store (and (load addr:$dst), i32immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; - - def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src), - "and{b}\t{$src, %al|%al, $src}", []>; - def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src), - "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src), - "and{l}\t{$src, %eax|%eax, $src}", []>; - -} // Constraints = "" - - -let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y -def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst), - (ins GR8 :$src1, GR8 :$src2), - "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>; -def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>, - OpSize; -def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>; -} - -// OR instructions with the destination register in REG and the source register -// in R/M. Included for the disassembler. -let isCodeGenOnly = 1 in { -def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "or{b}\t{$src2, $dst|$dst, $src2}", []>; -def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst), - (ins GR8 :$src1, i8mem :$src2), - "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, - (load addr:$src2)))]>; -def OR16rm : I<0x0B, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, - (load addr:$src2)))]>, - OpSize; -def OR32rm : I<0x0B, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, - (load addr:$src2)))]>; - -def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst), - (ins GR8 :$src1, i8imm:$src2), - "or{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>; -def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, - imm:$src2))]>, OpSize; -def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, - imm:$src2))]>; - -def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "or{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1, - i16immSExt8:$src2))]>, OpSize; -def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "or{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, - i32immSExt8:$src2))]>; -let Constraints = "" in { - def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), - "or{b}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), GR8:$src), addr:$dst), - (implicit EFLAGS)]>; - def OR16mr : I<0x09, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), - "or{w}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), GR16:$src), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def OR32mr : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "or{l}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), GR32:$src), addr:$dst), - (implicit EFLAGS)]>; - def OR8mi : Ii8<0x80, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), - "or{b}\t{$src, $dst|$dst, $src}", - [(store (or (loadi8 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def OR16mi : Ii16<0x81, MRM1m, (outs), (ins i16mem:$dst, i16imm:$src), - "or{w}\t{$src, $dst|$dst, $src}", - [(store (or (loadi16 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def OR32mi : Ii32<0x81, MRM1m, (outs), (ins i32mem:$dst, i32imm:$src), - "or{l}\t{$src, $dst|$dst, $src}", - [(store (or (loadi32 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def OR16mi8 : Ii8<0x83, MRM1m, (outs), (ins i16mem:$dst, i16i8imm:$src), - "or{w}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), i16immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src), - "or{l}\t{$src, $dst|$dst, $src}", - [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; - - def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src), - "or{b}\t{$src, %al|%al, $src}", []>; - def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src), - "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src), - "or{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" - - -let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y - def XOR8rr : I<0x30, MRMDestReg, - (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, - GR8:$src2))]>; - def XOR16rr : I<0x31, MRMDestReg, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, - GR16:$src2))]>, OpSize; - def XOR32rr : I<0x31, MRMDestReg, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, - GR32:$src2))]>; -} // isCommutable = 1 - -// XOR instructions with the destination register in REG and the source register -// in R/M. Included for the disassembler. -let isCodeGenOnly = 1 in { -def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", []>; -def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def XOR8rm : I<0x32, MRMSrcMem, - (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, - (load addr:$src2)))]>; -def XOR16rm : I<0x33, MRMSrcMem, - (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, - (load addr:$src2)))]>, - OpSize; -def XOR32rm : I<0x33, MRMSrcMem, - (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, - (load addr:$src2)))]>; - -def XOR8ri : Ii8<0x80, MRM6r, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "xor{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>; -def XOR16ri : Ii16<0x81, MRM6r, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, - imm:$src2))]>, OpSize; -def XOR32ri : Ii32<0x81, MRM6r, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, - imm:$src2))]>; -def XOR16ri8 : Ii8<0x83, MRM6r, - (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), - "xor{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1, - i16immSExt8:$src2))]>, - OpSize; -def XOR32ri8 : Ii8<0x83, MRM6r, - (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), - "xor{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, - i32immSExt8:$src2))]>; - -let Constraints = "" in { - def XOR8mr : I<0x30, MRMDestMem, - (outs), (ins i8mem :$dst, GR8 :$src), - "xor{b}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), GR8:$src), addr:$dst), - (implicit EFLAGS)]>; - def XOR16mr : I<0x31, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src), - "xor{w}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), GR16:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def XOR32mr : I<0x31, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src), - "xor{l}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), GR32:$src), addr:$dst), - (implicit EFLAGS)]>; - def XOR8mi : Ii8<0x80, MRM6m, - (outs), (ins i8mem :$dst, i8imm :$src), - "xor{b}\t{$src, $dst|$dst, $src}", - [(store (xor (loadi8 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def XOR16mi : Ii16<0x81, MRM6m, - (outs), (ins i16mem:$dst, i16imm:$src), - "xor{w}\t{$src, $dst|$dst, $src}", - [(store (xor (loadi16 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def XOR32mi : Ii32<0x81, MRM6m, - (outs), (ins i32mem:$dst, i32imm:$src), - "xor{l}\t{$src, $dst|$dst, $src}", - [(store (xor (loadi32 addr:$dst), imm:$src), addr:$dst), - (implicit EFLAGS)]>; - def XOR16mi8 : Ii8<0x83, MRM6m, - (outs), (ins i16mem:$dst, i16i8imm :$src), - "xor{w}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), i16immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>, - OpSize; - def XOR32mi8 : Ii8<0x83, MRM6m, - (outs), (ins i32mem:$dst, i32i8imm :$src), - "xor{l}\t{$src, $dst|$dst, $src}", - [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst), - (implicit EFLAGS)]>; - - def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src), - "xor{b}\t{$src, %al|%al, $src}", []>; - def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src), - "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src), - "xor{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" -} // Defs = [EFLAGS] - -// Shift instructions -let Defs = [EFLAGS] in { -let Uses = [CL] in { -def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), - "shl{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (shl GR8:$src1, CL))]>; -def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), - "shl{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; -def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), - "shl{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (shl GR32:$src1, CL))]>; -} // Uses = [CL] - -def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), - "shl{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; - -let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. -def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "shl{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize; -def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "shl{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; - -// NOTE: We don't include patterns for shifts of a register by one, because -// 'add reg,reg' is cheaper. - -def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), - "shl{b}\t$dst", []>; -def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), - "shl{w}\t$dst", []>, OpSize; -def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), - "shl{l}\t$dst", []>; - -} // isConvertibleToThreeAddress = 1 - -let Constraints = "" in { - let Uses = [CL] in { - def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), - "shl{b}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>; - def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst), - "shl{w}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; - def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), - "shl{l}\t{%cl, $dst|$dst, CL}", - [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), - "shl{b}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src), - "shl{w}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src), - "shl{l}\t{$src, $dst|$dst, $src}", - [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Shift by 1 - def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), - "shl{b}\t$dst", - [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst), - "shl{w}\t$dst", - [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize; - def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), - "shl{l}\t$dst", - [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - -let Uses = [CL] in { -def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), - "shr{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (srl GR8:$src1, CL))]>; -def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), - "shr{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; -def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), - "shr{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (srl GR32:$src1, CL))]>; -} - -def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "shr{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; -def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "shr{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize; -def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "shr{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>; - -// Shift by 1 -def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1), - "shr{b}\t$dst", - [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>; -def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1), - "shr{w}\t$dst", - [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize; -def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), - "shr{l}\t$dst", - [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; - -let Constraints = "" in { - let Uses = [CL] in { - def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), - "shr{b}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>; - def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst), - "shr{w}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>, - OpSize; - def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), - "shr{l}\t{%cl, $dst|$dst, CL}", - [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), - "shr{b}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src), - "shr{w}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src), - "shr{l}\t{$src, $dst|$dst, $src}", - [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Shift by 1 - def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), - "shr{b}\t$dst", - [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst), - "shr{w}\t$dst", - [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize; - def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), - "shr{l}\t$dst", - [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - -let Uses = [CL] in { -def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), - "sar{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (sra GR8:$src1, CL))]>; -def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), - "sar{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; -def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), - "sar{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (sra GR32:$src1, CL))]>; -} - -def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), - "sar{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; -def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "sar{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>, - OpSize; -def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "sar{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>; - -// Shift by 1 -def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), - "sar{b}\t$dst", - [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>; -def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1), - "sar{w}\t$dst", - [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize; -def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), - "sar{l}\t$dst", - [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; - -let Constraints = "" in { - let Uses = [CL] in { - def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), - "sar{b}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>; - def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), - "sar{w}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; - def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), - "sar{l}\t{%cl, $dst|$dst, CL}", - [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), - "sar{b}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src), - "sar{w}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src), - "sar{l}\t{$src, $dst|$dst, $src}", - [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Shift by 1 - def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), - "sar{b}\t$dst", - [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst), - "sar{w}\t$dst", - [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize; - def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), - "sar{l}\t$dst", - [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - -// Rotate instructions - -def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), - "rcl{b}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -} -def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; - -def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -let Uses = [CL] in { -def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -} -def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; - -def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "rcl{l}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -} -def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; - -def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), - "rcr{b}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -} -def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; - -def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -let Uses = [CL] in { -def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -} -def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; - -def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "rcr{l}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -} -def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), - "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; - -let Constraints = "" in { -def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), - "rcl{b}\t{1, $dst|$dst, 1}", []>; -def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), - "rcl{l}\t{1, $dst|$dst, 1}", []>; -def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), - "rcr{b}\t{1, $dst|$dst, 1}", []>; -def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), - "rcr{l}\t{1, $dst|$dst, 1}", []>; -def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), - "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; - -let Uses = [CL] in { -def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -} -} // Constraints = "" - -// FIXME: provide shorter instructions when imm8 == 1 -let Uses = [CL] in { -def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), - "rol{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotl GR8:$src1, CL))]>; -def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), - "rol{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; -def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), - "rol{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotl GR32:$src1, CL))]>; -} - -def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), - "rol{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; -def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "rol{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, - OpSize; -def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "rol{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>; - -// Rotate by 1 -def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), - "rol{b}\t$dst", - [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>; -def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1), - "rol{w}\t$dst", - [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize; -def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), - "rol{l}\t$dst", - [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; - -let Constraints = "" in { - let Uses = [CL] in { - def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), - "rol{b}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>; - def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst), - "rol{w}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; - def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), - "rol{l}\t{%cl, $dst|$dst, CL}", - [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src), - "rol{b}\t{$src, $dst|$dst, $src}", - [(store (rotl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src), - "rol{w}\t{$src, $dst|$dst, $src}", - [(store (rotl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src), - "rol{l}\t{$src, $dst|$dst, $src}", - [(store (rotl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Rotate by 1 - def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), - "rol{b}\t$dst", - [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst), - "rol{w}\t$dst", - [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize; - def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), - "rol{l}\t$dst", - [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - -let Uses = [CL] in { -def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), - "ror{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotr GR8:$src1, CL))]>; -def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), - "ror{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; -def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), - "ror{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotr GR32:$src1, CL))]>; -} - -def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), - "ror{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; -def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), - "ror{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, - OpSize; -def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), - "ror{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>; - -// Rotate by 1 -def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), - "ror{b}\t$dst", - [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>; -def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1), - "ror{w}\t$dst", - [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize; -def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), - "ror{l}\t$dst", - [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; - -let Constraints = "" in { - let Uses = [CL] in { - def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), - "ror{b}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>; - def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), - "ror{w}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; - def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), - "ror{l}\t{%cl, $dst|$dst, CL}", - [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>; - } - def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), - "ror{b}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src), - "ror{w}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, - OpSize; - def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src), - "ror{l}\t{$src, $dst|$dst, $src}", - [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; - - // Rotate by 1 - def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), - "ror{b}\t$dst", - [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; - def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst), - "ror{w}\t$dst", - [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, - OpSize; - def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), - "ror{l}\t$dst", - [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} // Constraints = "" - - -// Double shift instructions (generalizations of rotate) -let Uses = [CL] in { -def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; -def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; -def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, - TB, OpSize; -def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, - TB, OpSize; -} - -let isCommutable = 1 in { // These instructions commute to each other. -def SHLD32rri8 : Ii8<0xA4, MRMDestReg, - (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2, i8imm:$src3), - "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, - (i8 imm:$src3)))]>, - TB; -def SHRD32rri8 : Ii8<0xAC, MRMDestReg, - (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2, i8imm:$src3), - "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, - (i8 imm:$src3)))]>, - TB; -def SHLD16rri8 : Ii8<0xA4, MRMDestReg, - (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2, i8imm:$src3), - "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, - (i8 imm:$src3)))]>, - TB, OpSize; -def SHRD16rri8 : Ii8<0xAC, MRMDestReg, - (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2, i8imm:$src3), - "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, - (i8 imm:$src3)))]>, - TB, OpSize; -} - -let Constraints = "" in { - let Uses = [CL] in { - def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB; - def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), - addr:$dst)]>, TB; - } - def SHLD32mri8 : Ii8<0xA4, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), - "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB; - def SHRD32mri8 : Ii8<0xAC, MRMDestMem, - (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), - "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB; - - let Uses = [CL] in { - def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize; - def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", - [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), - addr:$dst)]>, TB, OpSize; - } - def SHLD16mri8 : Ii8<0xA4, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), - "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shld (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB, OpSize; - def SHRD16mri8 : Ii8<0xAC, MRMDestMem, - (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), - "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, - (i8 imm:$src3)), addr:$dst)]>, - TB, OpSize; -} // Constraints = "" -} // Defs = [EFLAGS] - - -// Arithmetic. -let Defs = [EFLAGS] in { -let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y -// Register-Register Addition -def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst), - (ins GR8 :$src1, GR8 :$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>; - -let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. -// Register-Register Addition -def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1, - GR16:$src2))]>, OpSize; -def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1, - GR32:$src2))]>; -} // end isConvertibleToThreeAddress -} // end isCommutable - -// These are alternate spellings for use by the disassembler, we mark them as -// code gen only to ensure they aren't matched by the assembler. -let isCodeGenOnly = 1 in { - def ADD8rr_alt: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", []>; - def ADD16rr_alt: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; - def ADD32rr_alt: I<0x03, MRMSrcReg,(outs GR32:$dst),(ins GR32:$src1, GR32:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -// Register-Memory Addition -def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst), - (ins GR8 :$src1, i8mem :$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, - (load addr:$src2)))]>; -def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1, - (load addr:$src2)))]>, OpSize; -def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1, - (load addr:$src2)))]>; - -// Register-Integer Addition -def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, - (X86add_flag GR8:$src1, imm:$src2))]>; - -let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. -// Register-Integer Addition -def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86add_flag GR16:$src1, imm:$src2))]>, OpSize; -def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86add_flag GR32:$src1, imm:$src2))]>; -def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize; -def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86add_flag GR32:$src1, i32immSExt8:$src2))]>; -} - -let Constraints = "" in { - // Memory-Register Addition - def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), GR8:$src2), addr:$dst), - (implicit EFLAGS)]>; - def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), GR16:$src2), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), GR32:$src2), addr:$dst), - (implicit EFLAGS)]>; - def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), - "add{b}\t{$src2, $dst|$dst, $src2}", - [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst), - (implicit EFLAGS)]>; - def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst), - (implicit EFLAGS)]>; - def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "add{w}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>, OpSize; - def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(store (add (load addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>; - - // addition to rAX - def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src), - "add{b}\t{$src, %al|%al, $src}", []>; - def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src), - "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src), - "add{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" - -let Uses = [EFLAGS] in { -let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y -def ADC8rr : I<0x10, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (adde GR8:$src1, GR8:$src2))]>; -def ADC16rr : I<0x11, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (adde GR16:$src1, GR16:$src2))]>, OpSize; -def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>; -} - -let isCodeGenOnly = 1 in { -def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", []>; -def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst), - (ins GR8:$src1, i8mem:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2)))]>; -def ADC16rm : I<0x13, MRMSrcMem , (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2)))]>, - OpSize; -def ADC32rm : I<0x13, MRMSrcMem , (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, (load addr:$src2)))]>; -def ADC8ri : Ii8<0x80, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (adde GR8:$src1, imm:$src2))]>; -def ADC16ri : Ii16<0x81, MRM2r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (adde GR16:$src1, imm:$src2))]>, OpSize; -def ADC16ri8 : Ii8<0x83, MRM2r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (adde GR16:$src1, i16immSExt8:$src2))]>, - OpSize; -def ADC32ri : Ii32<0x81, MRM2r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, imm:$src2))]>; -def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>; - -let Constraints = "" in { - def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>; - def ADC16mr : I<0x11, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), GR16:$src2), addr:$dst)]>, - OpSize; - def ADC32mr : I<0x11, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), GR32:$src2), addr:$dst)]>; - def ADC8mi : Ii8<0x80, MRM2m, (outs), (ins i8mem:$dst, i8imm:$src2), - "adc{b}\t{$src2, $dst|$dst, $src2}", - [(store (adde (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; - def ADC16mi : Ii16<0x81, MRM2m, (outs), (ins i16mem:$dst, i16imm:$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(store (adde (loadi16 addr:$dst), imm:$src2), addr:$dst)]>, - OpSize; - def ADC16mi8 : Ii8<0x83, MRM2m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "adc{w}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>, - OpSize; - def ADC32mi : Ii32<0x81, MRM2m, (outs), (ins i32mem:$dst, i32imm:$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>; - def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "adc{l}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; - - def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src), - "adc{b}\t{$src, %al|%al, $src}", []>; - def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src), - "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src), - "adc{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" -} // Uses = [EFLAGS] - -// Register-Register Subtraction -def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, - (X86sub_flag GR8:$src1, GR8:$src2))]>; -def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize; -def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86sub_flag GR32:$src1, GR32:$src2))]>; - -let isCodeGenOnly = 1 in { -def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", []>; -def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -// Register-Memory Subtraction -def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), - (ins GR8 :$src1, i8mem :$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, - (X86sub_flag GR8:$src1, (load addr:$src2)))]>; -def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize; -def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86sub_flag GR32:$src1, (load addr:$src2)))]>; - -// Register-Integer Subtraction -def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst), - (ins GR8:$src1, i8imm:$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, EFLAGS, - (X86sub_flag GR8:$src1, imm:$src2))]>; -def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize; -def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86sub_flag GR32:$src1, imm:$src2))]>; -def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize; -def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>; - -let Constraints = "" in { - // Memory-Register Subtraction - def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR8:$src2), addr:$dst), - (implicit EFLAGS)]>; - def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR16:$src2), addr:$dst), - (implicit EFLAGS)]>, OpSize; - def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR32:$src2), addr:$dst), - (implicit EFLAGS)]>; - - // Memory-Integer Subtraction - def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), - "sub{b}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst), - (implicit EFLAGS)]>; - def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst), - (implicit EFLAGS)]>, OpSize; - def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst), - (implicit EFLAGS)]>; - def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>, OpSize; - def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)]>; - - def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src), - "sub{b}\t{$src, %al|%al, $src}", []>; - def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src), - "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src), - "sub{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" - -let Uses = [EFLAGS] in { -def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst), - (ins GR8:$src1, GR8:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sube GR8:$src1, GR8:$src2))]>; -def SBB16rr : I<0x19, MRMDestReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sube GR16:$src1, GR16:$src2))]>, OpSize; -def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>; - -let Constraints = "" in { - def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>; - def SBB16mr : I<0x19, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), GR16:$src2), addr:$dst)]>, - OpSize; - def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>; - def SBB8mi : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; - def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(store (sube (loadi16 addr:$dst), imm:$src2), addr:$dst)]>, - OpSize; - def SBB16mi8 : Ii8<0x83, MRM3m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>, - OpSize; - def SBB32mi : Ii32<0x81, MRM3m, (outs), (ins i32mem:$dst, i32imm:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(store (sube (loadi32 addr:$dst), imm:$src2), addr:$dst)]>; - def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; - - def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src), - "sbb{b}\t{$src, %al|%al, $src}", []>; - def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src), - "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize; - def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src), - "sbb{l}\t{$src, %eax|%eax, $src}", []>; -} // Constraints = "" - -let isCodeGenOnly = 1 in { -def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", []>; -def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), - (ins GR16:$src1, GR16:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; -def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", []>; -} - -def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2)))]>; -def SBB16rm : I<0x1B, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2)))]>, - OpSize; -def SBB32rm : I<0x1B, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, (load addr:$src2)))]>; -def SBB8ri : Ii8<0x80, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sube GR8:$src1, imm:$src2))]>; -def SBB16ri : Ii16<0x81, MRM3r, (outs GR16:$dst), - (ins GR16:$src1, i16imm:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sube GR16:$src1, imm:$src2))]>, OpSize; -def SBB16ri8 : Ii8<0x83, MRM3r, (outs GR16:$dst), - (ins GR16:$src1, i16i8imm:$src2), - "sbb{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sube GR16:$src1, i16immSExt8:$src2))]>, - OpSize; -def SBB32ri : Ii32<0x81, MRM3r, (outs GR32:$dst), - (ins GR32:$src1, i32imm:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, imm:$src2))]>; -def SBB32ri8 : Ii8<0x83, MRM3r, (outs GR32:$dst), - (ins GR32:$src1, i32i8imm:$src2), - "sbb{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, i32immSExt8:$src2))]>; -} // Uses = [EFLAGS] -} // Defs = [EFLAGS] - -let Defs = [EFLAGS] in { -let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y -// Register-Register Signed Integer Multiply -def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), - "imul{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize; -def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), - "imul{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, GR32:$src2))]>, TB; -} - -// Register-Memory Signed Integer Multiply -def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$src1, i16mem:$src2), - "imul{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, (load addr:$src2)))]>, - TB, OpSize; -def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$src1, i32mem:$src2), - "imul{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB; -} // Defs = [EFLAGS] -} // end Two Address instructions - -// Suprisingly enough, these are not two address instructions! -let Defs = [EFLAGS] in { -// Register-Integer Signed Integer Multiply -def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize; -def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), - "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, imm:$src2))]>; -def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 - (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), - "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, - OpSize; -def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 - (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), - "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>; - -// Memory-Integer Signed Integer Multiply -def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 - (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), - "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), imm:$src2))]>, - OpSize; -def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 - (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), - "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), imm:$src2))]>; -def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 - (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), - "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), - i16immSExt8:$src2))]>, OpSize; -def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 - (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), - "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, EFLAGS, - (X86smul_flag (load addr:$src1), - i32immSExt8:$src2))]>; -} // Defs = [EFLAGS] - -//===----------------------------------------------------------------------===// -// Test instructions are just like AND, except they don't generate a result. -// -let Defs = [EFLAGS] in { -let isCommutable = 1 in { // TEST X, Y --> TEST Y, X -def TEST8rr : I<0x84, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2), - "test{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>; -def TEST16rr : I<0x85, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), - "test{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2), - 0))]>, - OpSize; -def TEST32rr : I<0x85, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), - "test{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2), - 0))]>; -} - -def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src), - "test{b}\t{$src, %al|%al, $src}", []>; -def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src), - "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize; -def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src), - "test{l}\t{$src, %eax|%eax, $src}", []>; - -def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), - "test{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)), - 0))]>; -def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2), - "test{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR16:$src1, - (loadi16 addr:$src2)), 0))]>, OpSize; -def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), - "test{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and GR32:$src1, - (loadi32 addr:$src2)), 0))]>; - -def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8 - (outs), (ins GR8:$src1, i8imm:$src2), - "test{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>; -def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16 - (outs), (ins GR16:$src1, i16imm:$src2), - "test{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>, - OpSize; -def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32 - (outs), (ins GR32:$src1, i32imm:$src2), - "test{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>; - -def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8 - (outs), (ins i8mem:$src1, i8imm:$src2), - "test{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2), - 0))]>; -def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16 - (outs), (ins i16mem:$src1, i16imm:$src2), - "test{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2), - 0))]>, OpSize; -def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32 - (outs), (ins i32mem:$src1, i32imm:$src2), - "test{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2), - 0))]>; -} // Defs = [EFLAGS] - // Condition code ops, incl. set if equal/not equal/... let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in @@ -3374,305 +925,10 @@ def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags -let Uses = [EFLAGS] in { -// Use sbb to materialize carry bit. -let Defs = [EFLAGS], isCodeGenOnly = 1 in { -// FIXME: These are pseudo ops that should be replaced with Pat<> patterns. -// However, Pat<> can't replicate the destination reg into the inputs of the -// result. -// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces -// X86CodeEmitter. -def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", - [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; -def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, - OpSize; -def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; -} // isCodeGenOnly - -def SETEr : I<0x94, MRM0r, - (outs GR8 :$dst), (ins), - "sete\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_E, EFLAGS))]>, - TB; // GR8 = == -def SETEm : I<0x94, MRM0m, - (outs), (ins i8mem:$dst), - "sete\t$dst", - [(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>, - TB; // [mem8] = == - -def SETNEr : I<0x95, MRM0r, - (outs GR8 :$dst), (ins), - "setne\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_NE, EFLAGS))]>, - TB; // GR8 = != -def SETNEm : I<0x95, MRM0m, - (outs), (ins i8mem:$dst), - "setne\t$dst", - [(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = != - -def SETLr : I<0x9C, MRM0r, - (outs GR8 :$dst), (ins), - "setl\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_L, EFLAGS))]>, - TB; // GR8 = < signed -def SETLm : I<0x9C, MRM0m, - (outs), (ins i8mem:$dst), - "setl\t$dst", - [(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>, - TB; // [mem8] = < signed - -def SETGEr : I<0x9D, MRM0r, - (outs GR8 :$dst), (ins), - "setge\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_GE, EFLAGS))]>, - TB; // GR8 = >= signed -def SETGEm : I<0x9D, MRM0m, - (outs), (ins i8mem:$dst), - "setge\t$dst", - [(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = >= signed - -def SETLEr : I<0x9E, MRM0r, - (outs GR8 :$dst), (ins), - "setle\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_LE, EFLAGS))]>, - TB; // GR8 = <= signed -def SETLEm : I<0x9E, MRM0m, - (outs), (ins i8mem:$dst), - "setle\t$dst", - [(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = <= signed - -def SETGr : I<0x9F, MRM0r, - (outs GR8 :$dst), (ins), - "setg\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_G, EFLAGS))]>, - TB; // GR8 = > signed -def SETGm : I<0x9F, MRM0m, - (outs), (ins i8mem:$dst), - "setg\t$dst", - [(store (X86setcc X86_COND_G, EFLAGS), addr:$dst)]>, - TB; // [mem8] = > signed - -def SETBr : I<0x92, MRM0r, - (outs GR8 :$dst), (ins), - "setb\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_B, EFLAGS))]>, - TB; // GR8 = < unsign -def SETBm : I<0x92, MRM0m, - (outs), (ins i8mem:$dst), - "setb\t$dst", - [(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>, - TB; // [mem8] = < unsign - -def SETAEr : I<0x93, MRM0r, - (outs GR8 :$dst), (ins), - "setae\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_AE, EFLAGS))]>, - TB; // GR8 = >= unsign -def SETAEm : I<0x93, MRM0m, - (outs), (ins i8mem:$dst), - "setae\t$dst", - [(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = >= unsign - -def SETBEr : I<0x96, MRM0r, - (outs GR8 :$dst), (ins), - "setbe\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_BE, EFLAGS))]>, - TB; // GR8 = <= unsign -def SETBEm : I<0x96, MRM0m, - (outs), (ins i8mem:$dst), - "setbe\t$dst", - [(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>, - TB; // [mem8] = <= unsign - -def SETAr : I<0x97, MRM0r, - (outs GR8 :$dst), (ins), - "seta\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_A, EFLAGS))]>, - TB; // GR8 = > signed -def SETAm : I<0x97, MRM0m, - (outs), (ins i8mem:$dst), - "seta\t$dst", - [(store (X86setcc X86_COND_A, EFLAGS), addr:$dst)]>, - TB; // [mem8] = > signed - -def SETSr : I<0x98, MRM0r, - (outs GR8 :$dst), (ins), - "sets\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_S, EFLAGS))]>, - TB; // GR8 = <sign bit> -def SETSm : I<0x98, MRM0m, - (outs), (ins i8mem:$dst), - "sets\t$dst", - [(store (X86setcc X86_COND_S, EFLAGS), addr:$dst)]>, - TB; // [mem8] = <sign bit> -def SETNSr : I<0x99, MRM0r, - (outs GR8 :$dst), (ins), - "setns\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_NS, EFLAGS))]>, - TB; // GR8 = !<sign bit> -def SETNSm : I<0x99, MRM0m, - (outs), (ins i8mem:$dst), - "setns\t$dst", - [(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>, - TB; // [mem8] = !<sign bit> - -def SETPr : I<0x9A, MRM0r, - (outs GR8 :$dst), (ins), - "setp\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_P, EFLAGS))]>, - TB; // GR8 = parity -def SETPm : I<0x9A, MRM0m, - (outs), (ins i8mem:$dst), - "setp\t$dst", - [(store (X86setcc X86_COND_P, EFLAGS), addr:$dst)]>, - TB; // [mem8] = parity -def SETNPr : I<0x9B, MRM0r, - (outs GR8 :$dst), (ins), - "setnp\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_NP, EFLAGS))]>, - TB; // GR8 = not parity -def SETNPm : I<0x9B, MRM0m, - (outs), (ins i8mem:$dst), - "setnp\t$dst", - [(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>, - TB; // [mem8] = not parity - -def SETOr : I<0x90, MRM0r, - (outs GR8 :$dst), (ins), - "seto\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>, - TB; // GR8 = overflow -def SETOm : I<0x90, MRM0m, - (outs), (ins i8mem:$dst), - "seto\t$dst", - [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>, - TB; // [mem8] = overflow -def SETNOr : I<0x91, MRM0r, - (outs GR8 :$dst), (ins), - "setno\t$dst", - [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>, - TB; // GR8 = not overflow -def SETNOm : I<0x91, MRM0m, - (outs), (ins i8mem:$dst), - "setno\t$dst", - [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>, - TB; // [mem8] = not overflow -} // Uses = [EFLAGS] - - -// Integer comparisons -let Defs = [EFLAGS] in { -def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src), - "cmp{b}\t{$src, %al|%al, $src}", []>; -def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src), - "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize; -def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src), - "cmp{l}\t{$src, %eax|%eax, $src}", []>; - -def CMP8rr : I<0x38, MRMDestReg, - (outs), (ins GR8 :$src1, GR8 :$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>; -def CMP16rr : I<0x39, MRMDestReg, - (outs), (ins GR16:$src1, GR16:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize; -def CMP32rr : I<0x39, MRMDestReg, - (outs), (ins GR32:$src1, GR32:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>; -def CMP8mr : I<0x38, MRMDestMem, - (outs), (ins i8mem :$src1, GR8 :$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>; -def CMP16mr : I<0x39, MRMDestMem, - (outs), (ins i16mem:$src1, GR16:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>, - OpSize; -def CMP32mr : I<0x39, MRMDestMem, - (outs), (ins i32mem:$src1, GR32:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>; -def CMP8rm : I<0x3A, MRMSrcMem, - (outs), (ins GR8 :$src1, i8mem :$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>; -def CMP16rm : I<0x3B, MRMSrcMem, - (outs), (ins GR16:$src1, i16mem:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>, - OpSize; -def CMP32rm : I<0x3B, MRMSrcMem, - (outs), (ins GR32:$src1, i32mem:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>; - -// These are alternate spellings for use by the disassembler, we mark them as -// code gen only to ensure they aren't matched by the assembler. -let isCodeGenOnly = 1 in { - def CMP8rr_alt : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", []>; - def CMP16rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize; - def CMP32rr_alt : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", []>; -} -def CMP8ri : Ii8<0x80, MRM7r, - (outs), (ins GR8:$src1, i8imm:$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>; -def CMP16ri : Ii16<0x81, MRM7r, - (outs), (ins GR16:$src1, i16imm:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize; -def CMP32ri : Ii32<0x81, MRM7r, - (outs), (ins GR32:$src1, i32imm:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>; -def CMP8mi : Ii8 <0x80, MRM7m, - (outs), (ins i8mem :$src1, i8imm :$src2), - "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>; -def CMP16mi : Ii16<0x81, MRM7m, - (outs), (ins i16mem:$src1, i16imm:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>, - OpSize; -def CMP32mi : Ii32<0x81, MRM7m, - (outs), (ins i32mem:$src1, i32imm:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>; -def CMP16ri8 : Ii8<0x83, MRM7r, - (outs), (ins GR16:$src1, i16i8imm:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>, - OpSize; -def CMP16mi8 : Ii8<0x83, MRM7m, - (outs), (ins i16mem:$src1, i16i8imm:$src2), - "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi16 addr:$src1), - i16immSExt8:$src2))]>, OpSize; -def CMP32mi8 : Ii8<0x83, MRM7m, - (outs), (ins i32mem:$src1, i32i8imm:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp (loadi32 addr:$src1), - i32immSExt8:$src2))]>; -def CMP32ri8 : Ii8<0x83, MRM7r, - (outs), (ins GR32:$src1, i32i8imm:$src2), - "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>; -} // Defs = [EFLAGS] +//===----------------------------------------------------------------------===// +// Bit tests instructions: BT, BTS, BTR, BTC. -// Bit tests. -// TODO: BTC, BTR, and BTS let Defs = [EFLAGS] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", @@ -3680,6 +936,9 @@ def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB; +def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB; // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's @@ -3687,17 +946,23 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), // only for now. def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), - "bt{w}\t{$src2, $src1|$src1, $src2}", + "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), // (implicit EFLAGS)] [] >, OpSize, TB, Requires<[FastBTMem]>; def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), - "bt{l}\t{$src2, $src1|$src1, $src2}", + "bt{l}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi32 addr:$src1), GR32:$src2), // (implicit EFLAGS)] [] >, TB, Requires<[FastBTMem]>; +def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", +// [(X86bt (loadi64 addr:$src1), GR64:$src2), +// (implicit EFLAGS)] + [] + >, TB; def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", @@ -3706,6 +971,10 @@ def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB; +def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; + // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. @@ -3717,307 +986,129 @@ def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2)) ]>, TB; +def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "bt{q}\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86bt (loadi64 addr:$src1), + i64immSExt8:$src2))]>, TB; + def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; +def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), + "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; } // Defs = [EFLAGS] -// Sign/Zero extenders -// Use movsbl intead of movsbw; we don't care about the high 16 bits -// of the register here. This has a smaller encoding and avoids a -// partial-register update. Actual movsbw included for the disassembler. -def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "", [(set GR16:$dst, (sext GR8:$src))]>, TB; -def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; -def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), - "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))]>, TB; -def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), - "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB; -def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), - "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))]>, TB; -def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), - "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; - -// Use movzbl intead of movzbw; we don't care about the high 16 bits -// of the register here. This has a smaller encoding and avoids a -// partial-register update. Actual movzbw included for the disassembler. -def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "", [(set GR16:$dst, (zext GR8:$src))]>, TB; -def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; -def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), - "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))]>, TB; -def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), - "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB; -def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), - "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))]>, TB; -def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), - "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; - -// These are the same as the regular MOVZX32rr8 and MOVZX32rm8 -// except that they use GR32_NOREX for the output operand register class -// instead of GR32. This allows them to operate on h registers on x86-64. -def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, - (outs GR32_NOREX:$dst), (ins GR8:$src), - "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", - []>, TB; -let mayLoad = 1 in -def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, - (outs GR32_NOREX:$dst), (ins i8mem:$src), - "movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX", - []>, TB; - -let neverHasSideEffects = 1 in { - let Defs = [AX], Uses = [AL] in - def CBW : I<0x98, RawFrm, (outs), (ins), - "{cbtw|cbw}", []>, OpSize; // AX = signext(AL) - let Defs = [EAX], Uses = [AX] in - def CWDE : I<0x98, RawFrm, (outs), (ins), - "{cwtl|cwde}", []>; // EAX = signext(AX) - - let Defs = [AX,DX], Uses = [AX] in - def CWD : I<0x99, RawFrm, (outs), (ins), - "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX) - let Defs = [EAX,EDX], Uses = [EAX] in - def CDQ : I<0x99, RawFrm, (outs), (ins), - "{cltd|cdq}", []>; // EDX:EAX = signext(EAX) -} - -//===----------------------------------------------------------------------===// -// Alias Instructions -//===----------------------------------------------------------------------===// - -// Alias instructions that map movr0 to xor. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. -// FIXME: Set encoding to pseudo. -let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isCodeGenOnly = 1 in { -def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)]>; - -// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller -// encoding and avoids a partial-register update sometimes, but doing so -// at isel time interferes with rematerialization in the current register -// allocator. For now, this is rewritten when the instruction is lowered -// to an MCInst. -def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), - "", - [(set GR16:$dst, 0)]>, OpSize; - -// FIXME: Set encoding to pseudo. -def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)]>; -} - -//===----------------------------------------------------------------------===// -// Thread Local Storage Instructions -// - -// ELF TLS Support -// All calls clobber the non-callee saved registers. ESP is marked as -// a use to prevent stack-pointer assignments that appear immediately -// before calls from potentially appearing dead. -let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in -def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), - "leal\t$sym, %eax; " - "call\t___tls_get_addr@PLT", - [(X86tlsaddr tls32addr:$sym)]>, - Requires<[In32BitMode]>; - -// Darwin TLS Support -// For i386, the address of the thunk is passed on the stack, on return the -// address of the variable is in %eax. %ecx is trashed during the function -// call. All other registers are preserved. -let Defs = [EAX, ECX], - Uses = [ESP], - usesCustomInserter = 1 in -def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), - "# TLSCall_32", - [(X86TLSCall addr:$sym)]>, - Requires<[In32BitMode]>; - -let AddedComplexity = 5, isCodeGenOnly = 1 in -def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "movl\t%gs:$src, $dst", - [(set GR32:$dst, (gsload addr:$src))]>, SegGS; - -let AddedComplexity = 5, isCodeGenOnly = 1 in -def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "movl\t%fs:$src, $dst", - [(set GR32:$dst, (fsload addr:$src))]>, SegFS; - -//===----------------------------------------------------------------------===// -// EH Pseudo Instructions -// -let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, isCodeGenOnly = 1 in { -def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), - "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)]>; - -} //===----------------------------------------------------------------------===// // Atomic support // -// Memory barriers - -// TODO: Get this to fold the constant into the instruction. -def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), - "lock\n\t" - "or{l}\t{$zero, $dst|$dst, $zero}", - []>, Requires<[In32BitMode]>, LOCK; - -let hasSideEffects = 1 in { -def Int_MemBarrier : I<0, Pseudo, (outs), (ins), - "#MEMBARRIER", - [(X86MemBarrier)]>, Requires<[HasSSE2]>; -} // Atomic swap. These are just normal xchg instructions. But since a memory // operand is referenced, the atomicity is ensured. let Constraints = "$val = $dst" in { -def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), - (ins GR32:$val, i32mem:$ptr), - "xchg{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>; -def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst), - (ins GR16:$val, i16mem:$ptr), - "xchg{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, - OpSize; def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - "xchg{b}\t{$val, $ptr|$ptr, $val}", + "xchg{b}\t{$val, $ptr|$ptr, $val}", [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>; +def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr), + "xchg{w}\t{$val, $ptr|$ptr, $val}", + [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, + OpSize; +def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr), + "xchg{l}\t{$val, $ptr|$ptr, $val}", + [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>; +def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr), + "xchg{q}\t{$val, $ptr|$ptr, $val}", + [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; -def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src), - "xchg{l}\t{$val, $src|$src, $val}", []>; -def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), - "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize; def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), "xchg{b}\t{$val, $src|$src, $val}", []>; +def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), + "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize; +def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src), + "xchg{l}\t{$val, $src|$src, $val}", []>; +def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), + "xchg{q}\t{$val, $src|$src, $val}", []>; } def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), "xchg{l}\t{$src, %eax|%eax, $src}", []>; +def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), + "xchg{q}\t{$src, %rax|%rax, $src}", []>; -// Atomic compare and swap. -let Defs = [EAX, EFLAGS], Uses = [EAX] in { -def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), - "lock\n\t" - "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; -} -let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { -def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), - "lock\n\t" - "cmpxchg8b\t$ptr", - [(X86cas8 addr:$ptr)]>, TB, LOCK; -} - -let Defs = [AX, EFLAGS], Uses = [AX] in { -def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), - "lock\n\t" - "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; -} -let Defs = [AL, EFLAGS], Uses = [AL] in { -def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), - "lock\n\t" - "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; -} -// Atomic exchange and add -let Constraints = "$val = $dst", Defs = [EFLAGS] in { -def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), - "lock\n\t" - "xadd{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, - TB, LOCK; -def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), - "lock\n\t" - "xadd{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, - TB, OpSize, LOCK; -def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - "lock\n\t" - "xadd{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, - TB, LOCK; -} def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; @@ -4025,6 +1116,8 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; let mayLoad = 1, mayStore = 1 in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), @@ -4033,6 +1126,9 @@ def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; + } def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), @@ -4041,6 +1137,8 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), @@ -4049,284 +1147,29 @@ def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), "cmpxchg8b\t$dst", []>, TB; -// Optimized codegen when the non-memory output is not used. -// FIXME: Use normal add / sub instructions and add lock prefix dynamically. -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { -def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - "lock\n\t" - "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "lock\n\t" - "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "lock\n\t" - "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), - "lock\n\t" - "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), - "lock\n\t" - "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), - "lock\n\t" - "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "lock\n\t" - "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "lock\n\t" - "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; - -def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), - "lock\n\t" - "inc{b}\t$dst", []>, LOCK; -def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), - "lock\n\t" - "inc{w}\t$dst", []>, OpSize, LOCK; -def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), - "lock\n\t" - "inc{l}\t$dst", []>, LOCK; - -def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), - "lock\n\t" - "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - "lock\n\t" - "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - "lock\n\t" - "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), - "lock\n\t" - "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), - "lock\n\t" - "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), - "lock\n\t" - "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; -def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), - "lock\n\t" - "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; -def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), - "lock\n\t" - "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; - -def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), - "lock\n\t" - "dec{b}\t$dst", []>, LOCK; -def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), - "lock\n\t" - "dec{w}\t$dst", []>, OpSize, LOCK; -def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), - "lock\n\t" - "dec{l}\t$dst", []>, LOCK; -} +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in +def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), + "cmpxchg16b\t$dst", []>, TB; -// Atomic exchange, and, or, xor -let Constraints = "$val = $dst", Defs = [EFLAGS], - usesCustomInserter = 1 in { -def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMAND32 PSEUDO!", - [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>; -def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMOR32 PSEUDO!", - [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>; -def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMXOR32 PSEUDO!", - [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>; -def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMNAND32 PSEUDO!", - [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>; -def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), - "#ATOMMIN32 PSEUDO!", - [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>; -def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMMAX32 PSEUDO!", - [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>; -def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMUMIN32 PSEUDO!", - [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>; -def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMUMAX32 PSEUDO!", - [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>; - -def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMAND16 PSEUDO!", - [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>; -def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMOR16 PSEUDO!", - [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>; -def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMXOR16 PSEUDO!", - [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>; -def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMNAND16 PSEUDO!", - [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>; -def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), - "#ATOMMIN16 PSEUDO!", - [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>; -def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMMAX16 PSEUDO!", - [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>; -def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMUMIN16 PSEUDO!", - [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>; -def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMUMAX16 PSEUDO!", - [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>; - -def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMAND8 PSEUDO!", - [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>; -def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMOR8 PSEUDO!", - [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>; -def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMXOR8 PSEUDO!", - [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>; -def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMNAND8 PSEUDO!", - [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>; -} -let Constraints = "$val1 = $dst1, $val2 = $dst2", - Defs = [EFLAGS, EAX, EBX, ECX, EDX], - Uses = [EAX, EBX, ECX, EDX], - mayLoad = 1, mayStore = 1, - usesCustomInserter = 1 in { -def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMAND6432 PSEUDO!", []>; -def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMOR6432 PSEUDO!", []>; -def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMXOR6432 PSEUDO!", []>; -def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMNAND6432 PSEUDO!", []>; -def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMADD6432 PSEUDO!", []>; -def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMSUB6432 PSEUDO!", []>; -def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMSWAP6432 PSEUDO!", []>; -} -// Segmentation support instructions. - -def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; - -// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. -def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), - "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; -def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; - -def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; -def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; - -def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; - -def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), - "str{w}\t{$dst}", []>, TB; -def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), - "str{w}\t{$dst}", []>, TB; -def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), - "ltr{w}\t{$src}", []>, TB; -def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), - "ltr{w}\t{$src}", []>, TB; - -def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), - "push{w}\t%fs", []>, OpSize, TB; -def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), - "push{l}\t%fs", []>, TB; -def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), - "push{w}\t%gs", []>, OpSize, TB; -def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), - "push{l}\t%gs", []>, TB; - -def POPFS16 : I<0xa1, RawFrm, (outs), (ins), - "pop{w}\t%fs", []>, OpSize, TB; -def POPFS32 : I<0xa1, RawFrm, (outs), (ins), - "pop{l}\t%fs", []>, TB; -def POPGS16 : I<0xa9, RawFrm, (outs), (ins), - "pop{w}\t%gs", []>, OpSize, TB; -def POPGS32 : I<0xa9, RawFrm, (outs), (ins), - "pop{l}\t%gs", []>, TB; - -def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lds{l}\t{$src, $dst|$dst, $src}", []>; -def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lss{l}\t{$src, $dst|$dst, $src}", []>, TB; -def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize; -def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "les{l}\t{$src, $dst|$dst, $src}", []>; -def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB; -def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; -def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB; - -def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), - "verr\t$seg", []>, TB; -def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), - "verr\t$seg", []>, TB; -def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), - "verw\t$seg", []>, TB; -def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), - "verw\t$seg", []>, TB; - -// Descriptor-table support instructions - -def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), - "sgdt\t$dst", []>, TB; -def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), - "sidt\t$dst", []>, TB; -def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), - "sldt{w}\t$dst", []>, TB; -def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins), - "sldt{w}\t$dst", []>, TB; -def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), - "lgdt\t$src", []>, TB; -def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), - "lidt\t$src", []>, TB; -def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), - "lldt{w}\t$src", []>, TB; -def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), - "lldt{w}\t$src", []>, TB; - // Lock instruction prefix def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>; +// Rex64 instruction prefix +def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>; + +// Data16 instruction prefix +def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>; + // Repeat string operation instruction prefixes // These uses the DF flag in the EFLAGS register to inc or dec ECX let Defs = [ECX], Uses = [ECX,EFLAGS] in { @@ -4336,35 +1179,19 @@ def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>; def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; } -// Segment override instruction prefixes -def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>; -def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>; -def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>; -def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>; -def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>; -def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; // String manipulation instructions - def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>; def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize; def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>; +def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>; def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize; def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>; -// CPU flow control instructions - -def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>; -def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB; - -// FPU control instructions - -def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB; // Flag instructions - def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>; def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>; def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>; @@ -4376,620 +1203,423 @@ def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>; def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB; // Table lookup instructions - def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>; -// Specialized register support - -def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB; -def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB; -def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; - -def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), - "smsw{w}\t$dst", []>, OpSize, TB; -def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), - "smsw{l}\t$dst", []>, TB; -// For memory operands, there is only a 16-bit form -def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins), - "smsw{w}\t$dst", []>, TB; - -def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src), - "lmsw{w}\t$src", []>, TB; -def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), - "lmsw{w}\t$src", []>, TB; - -def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; - -// Cache instructions - -def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; -def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; - -// VMX instructions - -// 66 0F 38 80 -def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; -// 66 0F 38 81 -def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; -// 0F 01 C1 -def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; -def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmclear\t$vmcs", []>, OpSize, TB; -// 0F 01 C2 -def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; -// 0F 01 C3 -def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; -def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), - "vmptrld\t$vmcs", []>, TB; -def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), - "vmptrst\t$vmcs", []>, TB; -def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; -def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; -def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; -def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; -def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; -def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; -def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; -def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; -// 0F 01 C4 -def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; -def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), - "vmxon\t{$vmxon}", []>, XS; +// ASCII Adjust After Addition +// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS +def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>; -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// +// ASCII Adjust AX Before Division +// sets AL, AH and EFLAGS and uses AL and AH +def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src), + "aad\t$src", []>, Requires<[In32BitMode]>; -// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable -def : Pat<(i32 (X86Wrapper tconstpool :$dst)), (MOV32ri tconstpool :$dst)>; -def : Pat<(i32 (X86Wrapper tjumptable :$dst)), (MOV32ri tjumptable :$dst)>; -def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>; -def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>; -def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>; -def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>; - -def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)), - (ADD32ri GR32:$src1, tconstpool:$src2)>; -def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)), - (ADD32ri GR32:$src1, tjumptable:$src2)>; -def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)), - (ADD32ri GR32:$src1, tglobaladdr:$src2)>; -def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)), - (ADD32ri GR32:$src1, texternalsym:$src2)>; -def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)), - (ADD32ri GR32:$src1, tblockaddress:$src2)>; - -def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst), - (MOV32mi addr:$dst, tglobaladdr:$src)>; -def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), - (MOV32mi addr:$dst, texternalsym:$src)>; -def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst), - (MOV32mi addr:$dst, tblockaddress:$src)>; - -// Calls -// tailcall stuff -def : Pat<(X86tcret GR32_TC:$dst, imm:$off), - (TCRETURNri GR32_TC:$dst, imm:$off)>, - Requires<[In32BitMode]>; - -// FIXME: This is disabled for 32-bit PIC mode because the global base -// register which is part of the address mode may be assigned a -// callee-saved register. -def : Pat<(X86tcret (load addr:$dst), imm:$off), - (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[In32BitMode, IsNotPIC]>; - -def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), - (TCRETURNdi texternalsym:$dst, imm:$off)>, - Requires<[In32BitMode]>; - -def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), - (TCRETURNdi texternalsym:$dst, imm:$off)>, - Requires<[In32BitMode]>; - -// Normal calls, with various flavors of addresses. -def : Pat<(X86call (i32 tglobaladdr:$dst)), - (CALLpcrel32 tglobaladdr:$dst)>; -def : Pat<(X86call (i32 texternalsym:$dst)), - (CALLpcrel32 texternalsym:$dst)>; -def : Pat<(X86call (i32 imm:$dst)), - (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; - -// X86 specific add which produces a flag. -def : Pat<(addc GR32:$src1, GR32:$src2), - (ADD32rr GR32:$src1, GR32:$src2)>; -def : Pat<(addc GR32:$src1, (load addr:$src2)), - (ADD32rm GR32:$src1, addr:$src2)>; -def : Pat<(addc GR32:$src1, imm:$src2), - (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(addc GR32:$src1, i32immSExt8:$src2), - (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; - -def : Pat<(subc GR32:$src1, GR32:$src2), - (SUB32rr GR32:$src1, GR32:$src2)>; -def : Pat<(subc GR32:$src1, (load addr:$src2)), - (SUB32rm GR32:$src1, addr:$src2)>; -def : Pat<(subc GR32:$src1, imm:$src2), - (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(subc GR32:$src1, i32immSExt8:$src2), - (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// Comparisons. - -// TEST R,R is smaller than CMP R,0 -def : Pat<(X86cmp GR8:$src1, 0), - (TEST8rr GR8:$src1, GR8:$src1)>; -def : Pat<(X86cmp GR16:$src1, 0), - (TEST16rr GR16:$src1, GR16:$src1)>; -def : Pat<(X86cmp GR32:$src1, 0), - (TEST32rr GR32:$src1, GR32:$src1)>; - -// Conditional moves with folded loads with operands swapped and conditions -// inverted. -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS), - (CMOVAE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS), - (CMOVAE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS), - (CMOVB16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS), - (CMOVB32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS), - (CMOVNE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS), - (CMOVNE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS), - (CMOVE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS), - (CMOVE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS), - (CMOVA16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS), - (CMOVA32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS), - (CMOVBE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS), - (CMOVBE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS), - (CMOVGE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS), - (CMOVGE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS), - (CMOVL16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS), - (CMOVL32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS), - (CMOVG16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS), - (CMOVG32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS), - (CMOVLE16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS), - (CMOVLE32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS), - (CMOVNP16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS), - (CMOVNP32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS), - (CMOVP16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS), - (CMOVP32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS), - (CMOVNS16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS), - (CMOVNS32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS), - (CMOVS16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS), - (CMOVS32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS), - (CMOVNO16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS), - (CMOVNO32rm GR32:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS), - (CMOVO16rm GR16:$src2, addr:$src1)>; -def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS), - (CMOVO32rm GR32:$src2, addr:$src1)>; - -// zextload bool -> zextload byte -def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; -def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; -def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; - -// extload bool -> extload byte -def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; -def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; -def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; -def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; -def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; -def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; - -// anyext. Define these to do an explicit zero-extend to -// avoid partial-register updates. -def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; -def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; - -// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. -def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; +// ASCII Adjust AX After Multiply +// sets AL, AH and EFLAGS and uses AL +def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src), + "aam\t$src", []>, Requires<[In32BitMode]>; +// ASCII Adjust AL After Subtraction - sets +// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS +def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>; -//===----------------------------------------------------------------------===// -// Some peepholes -//===----------------------------------------------------------------------===// - -// Odd encoding trick: -128 fits into an 8-bit immediate field while -// +128 doesn't, so in this special case use a sub instead of an add. -def : Pat<(add GR16:$src1, 128), - (SUB16ri8 GR16:$src1, -128)>; -def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), - (SUB16mi8 addr:$dst, -128)>; -def : Pat<(add GR32:$src1, 128), - (SUB32ri8 GR32:$src1, -128)>; -def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), - (SUB32mi8 addr:$dst, -128)>; - -// r & (2^16-1) ==> movz -def : Pat<(and GR32:$src1, 0xffff), - (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; -// r & (2^8-1) ==> movz -def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, - GR32_ABCD)), - sub_8bit))>, - Requires<[In32BitMode]>; -// r & (2^8-1) ==> movz -def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, - GR16_ABCD)), - sub_8bit))>, - Requires<[In32BitMode]>; - -// sext_inreg patterns -def : Pat<(sext_inreg GR32:$src, i16), - (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; -def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, - GR32_ABCD)), - sub_8bit))>, - Requires<[In32BitMode]>; -def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, - GR16_ABCD)), - sub_8bit))>, - Requires<[In32BitMode]>; - -// trunc patterns -def : Pat<(i16 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, sub_16bit)>; -def : Pat<(i8 (trunc GR32:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - sub_8bit)>, - Requires<[In32BitMode]>; -def : Pat<(i8 (trunc GR16:$src)), - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit)>, - Requires<[In32BitMode]>; - -// h-register tricks -def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)>, - Requires<[In32BitMode]>; -def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - sub_8bit_hi)>, - Requires<[In32BitMode]>; -def : Pat<(srl GR16:$src, (i8 8)), - (EXTRACT_SUBREG - (MOVZX32rr8 - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - sub_8bit_hi)), - sub_16bit)>, - Requires<[In32BitMode]>; -def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), - (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, - GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In32BitMode]>; -def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), - (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, - GR16_ABCD)), - sub_8bit_hi))>, - Requires<[In32BitMode]>; -def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), - (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, - GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In32BitMode]>; -def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), - (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, - GR32_ABCD)), - sub_8bit_hi))>, - Requires<[In32BitMode]>; - -// (shl x, 1) ==> (add x, x) -def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; -def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; -def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; - -// (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL, 31)), - (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL, 31)), - (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL, 31)), - (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), - (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), - (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), - (SHL32mCL addr:$dst)>; - -def : Pat<(srl GR8:$src1, (and CL, 31)), - (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL, 31)), - (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL, 31)), - (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), - (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), - (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), - (SHR32mCL addr:$dst)>; - -def : Pat<(sra GR8:$src1, (and CL, 31)), - (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL, 31)), - (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL, 31)), - (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), - (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), - (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), - (SAR32mCL addr:$dst)>; - -// (anyext (setcc_carry)) -> (setcc_carry) -def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C16r)>; -def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C32r)>; -def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), - (SETB_C32r)>; - -// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. -let AddedComplexity = 5 in { // Try this before the selecting to OR -def : Pat<(or_is_add GR16:$src1, imm:$src2), - (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(or_is_add GR32:$src1, imm:$src2), - (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2), - (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2), - (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(or_is_add GR16:$src1, GR16:$src2), - (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(or_is_add GR32:$src1, GR32:$src2), - (ADD32rr GR32:$src1, GR32:$src2)>; -} // AddedComplexity +// Decimal Adjust AL after Addition +// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS +def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>; -//===----------------------------------------------------------------------===// -// EFLAGS-defining Patterns -//===----------------------------------------------------------------------===// +// Decimal Adjust AL after Subtraction +// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS +def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>; -// add reg, reg -def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; - -// add reg, mem -def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), - (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), - (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), - (ADD32rm GR32:$src1, addr:$src2)>; - -// add reg, imm -def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; -def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(add GR16:$src1, i16immSExt8:$src2), - (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(add GR32:$src1, i32immSExt8:$src2), - (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// sub reg, reg -def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; - -// sub reg, mem -def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), - (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), - (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), - (SUB32rm GR32:$src1, addr:$src2)>; - -// sub reg, imm -def : Pat<(sub GR8:$src1, imm:$src2), - (SUB8ri GR8:$src1, imm:$src2)>; -def : Pat<(sub GR16:$src1, imm:$src2), - (SUB16ri GR16:$src1, imm:$src2)>; -def : Pat<(sub GR32:$src1, imm:$src2), - (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(sub GR16:$src1, i16immSExt8:$src2), - (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(sub GR32:$src1, i32immSExt8:$src2), - (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// mul reg, reg -def : Pat<(mul GR16:$src1, GR16:$src2), - (IMUL16rr GR16:$src1, GR16:$src2)>; -def : Pat<(mul GR32:$src1, GR32:$src2), - (IMUL32rr GR32:$src1, GR32:$src2)>; - -// mul reg, mem -def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), - (IMUL16rm GR16:$src1, addr:$src2)>; -def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), - (IMUL32rm GR32:$src1, addr:$src2)>; - -// mul reg, imm -def : Pat<(mul GR16:$src1, imm:$src2), - (IMUL16rri GR16:$src1, imm:$src2)>; -def : Pat<(mul GR32:$src1, imm:$src2), - (IMUL32rri GR32:$src1, imm:$src2)>; -def : Pat<(mul GR16:$src1, i16immSExt8:$src2), - (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(mul GR32:$src1, i32immSExt8:$src2), - (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>; - -// reg = mul mem, imm -def : Pat<(mul (loadi16 addr:$src1), imm:$src2), - (IMUL16rmi addr:$src1, imm:$src2)>; -def : Pat<(mul (loadi32 addr:$src1), imm:$src2), - (IMUL32rmi addr:$src1, imm:$src2)>; -def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2), - (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>; -def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2), - (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>; - -// Optimize multiply by 2 with EFLAGS result. -let AddedComplexity = 2 in { -def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>; -def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>; -} +// Check Array Index Against Bounds +def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "bound\t{$src, $dst|$dst, $src}", []>, OpSize, + Requires<[In32BitMode]>; +def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "bound\t{$src, $dst|$dst, $src}", []>, + Requires<[In32BitMode]>; -// Patterns for nodes that do not produce flags, for instructions that do. - -// Increment reg. -def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>; -def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>; - -// Decrement reg. -def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>; -def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>; - -// or reg/reg. -def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; - -// or reg/mem -def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), - (OR8rm GR8:$src1, addr:$src2)>; -def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), - (OR16rm GR16:$src1, addr:$src2)>; -def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), - (OR32rm GR32:$src1, addr:$src2)>; - -// or reg/imm -def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; -def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; -def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; -def : Pat<(or GR16:$src1, i16immSExt8:$src2), - (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(or GR32:$src1, i32immSExt8:$src2), - (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// xor reg/reg -def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; - -// xor reg/mem -def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), - (XOR8rm GR8:$src1, addr:$src2)>; -def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), - (XOR16rm GR16:$src1, addr:$src2)>; -def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), - (XOR32rm GR32:$src1, addr:$src2)>; - -// xor reg/imm -def : Pat<(xor GR8:$src1, imm:$src2), - (XOR8ri GR8:$src1, imm:$src2)>; -def : Pat<(xor GR16:$src1, imm:$src2), - (XOR16ri GR16:$src1, imm:$src2)>; -def : Pat<(xor GR32:$src1, imm:$src2), - (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(xor GR16:$src1, i16immSExt8:$src2), - (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(xor GR32:$src1, i32immSExt8:$src2), - (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; - -// and reg/reg -def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; -def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; - -// and reg/mem -def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), - (AND8rm GR8:$src1, addr:$src2)>; -def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), - (AND16rm GR16:$src1, addr:$src2)>; -def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), - (AND32rm GR32:$src1, addr:$src2)>; - -// and reg/imm -def : Pat<(and GR8:$src1, imm:$src2), - (AND8ri GR8:$src1, imm:$src2)>; -def : Pat<(and GR16:$src1, imm:$src2), - (AND16ri GR16:$src1, imm:$src2)>; -def : Pat<(and GR32:$src1, imm:$src2), - (AND32ri GR32:$src1, imm:$src2)>; -def : Pat<(and GR16:$src1, i16immSExt8:$src2), - (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(and GR32:$src1, i32immSExt8:$src2), - (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; +// Adjust RPL Field of Segment Selector +def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst), + "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>; +def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), + "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>; //===----------------------------------------------------------------------===// -// Floating Point Stack Support +// Subsystems. //===----------------------------------------------------------------------===// -include "X86InstrFPStack.td" - -//===----------------------------------------------------------------------===// -// X86-64 Support -//===----------------------------------------------------------------------===// +include "X86InstrArithmetic.td" +include "X86InstrCMovSetCC.td" +include "X86InstrExtension.td" +include "X86InstrControl.td" +include "X86InstrShiftRotate.td" -include "X86Instr64bit.td" +// X87 Floating Point Stack. +include "X86InstrFPStack.td" -//===----------------------------------------------------------------------===// // SIMD support (SSE, MMX and AVX) -//===----------------------------------------------------------------------===// - include "X86InstrFragmentsSIMD.td" -//===----------------------------------------------------------------------===// // FMA - Fused Multiply-Add support (requires FMA) -//===----------------------------------------------------------------------===// - include "X86InstrFMA.td" +// SSE, MMX and 3DNow! vector support. +include "X86InstrSSE.td" +include "X86InstrMMX.td" +include "X86Instr3DNow.td" + +include "X86InstrVMX.td" + +// System instructions. +include "X86InstrSystem.td" + +// Compiler Pseudo Instructions and Pat Patterns +include "X86InstrCompiler.td" + //===----------------------------------------------------------------------===// -// XMM Floating point support (requires SSE / SSE2) +// Assembler Mnemonic Aliases //===----------------------------------------------------------------------===// -include "X86InstrSSE.td" +def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>; +def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>; + +def : MnemonicAlias<"cbw", "cbtw">; +def : MnemonicAlias<"cwd", "cwtd">; +def : MnemonicAlias<"cdq", "cltd">; +def : MnemonicAlias<"cwde", "cwtl">; +def : MnemonicAlias<"cdqe", "cltq">; + +// lret maps to lretl, it is not ambiguous with lretq. +def : MnemonicAlias<"lret", "lretl">; + +def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; +def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; + +def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; +def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"popfd", "popfl">; + +// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in +// all modes. However: "push (addr)" and "push $42" should default to +// pushl/pushq depending on the current mode. Similar for "pop %bx" +def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>; +def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushfd", "pushfl">; + +def : MnemonicAlias<"repe", "rep">; +def : MnemonicAlias<"repz", "rep">; +def : MnemonicAlias<"repnz", "repne">; + +def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>; +def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>; + +def : MnemonicAlias<"salb", "shlb">; +def : MnemonicAlias<"salw", "shlw">; +def : MnemonicAlias<"sall", "shll">; +def : MnemonicAlias<"salq", "shlq">; + +def : MnemonicAlias<"smovb", "movsb">; +def : MnemonicAlias<"smovw", "movsw">; +def : MnemonicAlias<"smovl", "movsl">; +def : MnemonicAlias<"smovq", "movsq">; + +def : MnemonicAlias<"ud2a", "ud2">; +def : MnemonicAlias<"verrw", "verr">; + +// System instruction aliases. +def : MnemonicAlias<"iret", "iretl">; +def : MnemonicAlias<"sysret", "sysretl">; + +def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>; +def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>; + + +// Floating point stack aliases. +def : MnemonicAlias<"fcmovz", "fcmove">; +def : MnemonicAlias<"fcmova", "fcmovnbe">; +def : MnemonicAlias<"fcmovnae", "fcmovb">; +def : MnemonicAlias<"fcmovna", "fcmovbe">; +def : MnemonicAlias<"fcmovae", "fcmovnb">; +def : MnemonicAlias<"fcomip", "fcompi">; +def : MnemonicAlias<"fildq", "fildll">; +def : MnemonicAlias<"fldcww", "fldcw">; +def : MnemonicAlias<"fnstcww", "fnstcw">; +def : MnemonicAlias<"fnstsww", "fnstsw">; +def : MnemonicAlias<"fucomip", "fucompi">; +def : MnemonicAlias<"fwait", "wait">; + + +class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond> + : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix), + !strconcat(Prefix, NewCond, Suffix)>; + +/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of +/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for +/// example "setz" -> "sete". +multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> { + def C : CondCodeAlias<Prefix, Suffix, "c", "b">; // setc -> setb + def Z : CondCodeAlias<Prefix, Suffix, "z" , "e">; // setz -> sete + def NA : CondCodeAlias<Prefix, Suffix, "na", "be">; // setna -> setbe + def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae">; // setnb -> setae + def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae">; // setnc -> setae + def NG : CondCodeAlias<Prefix, Suffix, "ng", "le">; // setng -> setle + def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge">; // setnl -> setge + def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne">; // setnz -> setne + def PE : CondCodeAlias<Prefix, Suffix, "pe", "p">; // setpe -> setp + def PO : CondCodeAlias<Prefix, Suffix, "po", "np">; // setpo -> setnp + + def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">; // setnae -> setb + def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">; // setnbe -> seta + def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">; // setnge -> setl + def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">; // setnle -> setg +} + +// Aliases for set<CC> +defm : IntegerCondCodeMnemonicAlias<"set", "">; +// Aliases for j<CC> +defm : IntegerCondCodeMnemonicAlias<"j", "">; +// Aliases for cmov<CC>{w,l,q} +defm : IntegerCondCodeMnemonicAlias<"cmov", "w">; +defm : IntegerCondCodeMnemonicAlias<"cmov", "l">; +defm : IntegerCondCodeMnemonicAlias<"cmov", "q">; + //===----------------------------------------------------------------------===// -// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2) +// Assembler Instruction Aliases //===----------------------------------------------------------------------===// -include "X86InstrMMX.td" +// aad/aam default to base 10 if no operand is specified. +def : InstAlias<"aad", (AAD8i8 10)>; +def : InstAlias<"aam", (AAM8i8 10)>; + +// Disambiguate the mem/imm form of bt-without-a-suffix as btl. +def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>; + +// clr aliases. +def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>; +def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>; +def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>; +def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>; + +// div and idiv aliases for explicit A register. +def : InstAlias<"divb $src, %al", (DIV8r GR8 :$src)>; +def : InstAlias<"divw $src, %ax", (DIV16r GR16:$src)>; +def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>; +def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>; +def : InstAlias<"divb $src, %al", (DIV8m i8mem :$src)>; +def : InstAlias<"divw $src, %ax", (DIV16m i16mem:$src)>; +def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>; +def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>; +def : InstAlias<"idivb $src, %al", (IDIV8r GR8 :$src)>; +def : InstAlias<"idivw $src, %ax", (IDIV16r GR16:$src)>; +def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>; +def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>; +def : InstAlias<"idivb $src, %al", (IDIV8m i8mem :$src)>; +def : InstAlias<"idivw $src, %ax", (IDIV16m i16mem:$src)>; +def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>; +def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>; + + + +// Various unary fpstack operations default to operating on on ST1. +// For example, "fxch" -> "fxch %st(1)" +def : InstAlias<"faddp", (ADD_FPrST0 ST1)>; +def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>; +def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>; +def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>; +def : InstAlias<"fdivp", (DIVR_FPrST0 ST1)>; +def : InstAlias<"fdivrp", (DIV_FPrST0 ST1)>; +def : InstAlias<"fxch", (XCH_F ST1)>; +def : InstAlias<"fcomi", (COM_FIr ST1)>; +def : InstAlias<"fcompi", (COM_FIPr ST1)>; +def : InstAlias<"fucom", (UCOM_Fr ST1)>; +def : InstAlias<"fucomp", (UCOM_FPr ST1)>; +def : InstAlias<"fucomi", (UCOM_FIr ST1)>; +def : InstAlias<"fucompi", (UCOM_FIPr ST1)>; + +// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op. +// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate +// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with +// gas. +multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> { + def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), (Inst RST:$op)>; + def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>; +} + +defm : FpUnaryAlias<"fadd", ADD_FST0r>; +defm : FpUnaryAlias<"faddp", ADD_FPrST0>; +defm : FpUnaryAlias<"fsub", SUB_FST0r>; +defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>; +defm : FpUnaryAlias<"fsubr", SUBR_FST0r>; +defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>; +defm : FpUnaryAlias<"fmul", MUL_FST0r>; +defm : FpUnaryAlias<"fmulp", MUL_FPrST0>; +defm : FpUnaryAlias<"fdiv", DIV_FST0r>; +defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>; +defm : FpUnaryAlias<"fdivr", DIVR_FST0r>; +defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>; +defm : FpUnaryAlias<"fcomi", COM_FIr>; +defm : FpUnaryAlias<"fucomi", UCOM_FIr>; +defm : FpUnaryAlias<"fcompi", COM_FIPr>; +defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; + + +// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they +// commute. We also allow fdiv[r]p/fsubrp even though they don't commute, +// solely because gas supports it. +def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>; +def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>; +def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>; +def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>; +def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>; + +// We accept "fnstsw %eax" even though it only writes %ax. +def : InstAlias<"fnstsw %eax", (FNSTSW8r)>; +def : InstAlias<"fnstsw %al" , (FNSTSW8r)>; +def : InstAlias<"fnstsw" , (FNSTSW8r)>; + +// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but +// this is compatible with what GAS does. +def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>; +def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>; + +// "imul <imm>, B" is an alias for "imul <imm>, B, B". +def : InstAlias<"imulw $imm, $r", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm)>; +def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>; +def : InstAlias<"imull $imm, $r", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm)>; +def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>; +def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>; +def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>; + +// inb %dx -> inb %al, %dx +def : InstAlias<"inb %dx", (IN8rr)>; +def : InstAlias<"inw %dx", (IN16rr)>; +def : InstAlias<"inl %dx", (IN32rr)>; +def : InstAlias<"inb $port", (IN8ri i8imm:$port)>; +def : InstAlias<"inw $port", (IN16ri i8imm:$port)>; +def : InstAlias<"inl $port", (IN32ri i8imm:$port)>; + + +// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp +def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>; +def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>; +def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>; +def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; + +// Force mov without a suffix with a segment and mem to prefer the 'l' form of +// the move. All segment/mem forms are equivalent, this has the shortest +// encoding. +def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>; +def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>; + +// Match 'movq <largeimm>, <reg>' as an alias for movabsq. +def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>; + +// Match 'movq GR64, MMX' as an alias for movd. +def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>; +def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>; + +// movsd with no operands (as opposed to the SSE scalar move of a double) is an +// alias for movsl. (as in rep; movsd) +def : InstAlias<"movsd", (MOVSD)>; + +// movsx aliases +def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>; + +// movzx aliases +def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>; +// Note: No GR32->GR64 movzx form. + +// outb %dx -> outb %al, %dx +def : InstAlias<"outb %dx", (OUT8rr)>; +def : InstAlias<"outw %dx", (OUT16rr)>; +def : InstAlias<"outl %dx", (OUT32rr)>; +def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>; +def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>; +def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>; + +// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same +// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity +// errors, since its encoding is the most compact. +def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>; + +// shld/shrd op,op -> shld op, op, 1 +def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>; +def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>; +def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>; +def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>; +def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>; +def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>; + +def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>; +def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>; +def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>; +def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>; +def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>; +def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>; + +/* FIXME: This is disabled because the asm matcher is currently incapable of + * matching a fixed immediate like $1. +// "shl X, $1" is an alias for "shl X". +multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> { + def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>; + def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>; + def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>; + def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>; + def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>; + def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>; + def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>; + def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"), + (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>; +} + +defm : ShiftRotateByOneAlias<"rcl", "RCL">; +defm : ShiftRotateByOneAlias<"rcr", "RCR">; +defm : ShiftRotateByOneAlias<"rol", "ROL">; +defm : ShiftRotateByOneAlias<"ror", "ROR">; +FIXME */ + +// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms. +def : InstAlias<"testb $val, $mem", (TEST8rm GR8 :$val, i8mem :$mem)>; +def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>; +def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>; +def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>; + +// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms. +def : InstAlias<"xchgb $mem, $val", (XCHG8rm GR8 :$val, i8mem :$mem)>; +def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>; +def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>; +def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 11d4179..bb2165a 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -1,4 +1,4 @@ -//====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===// +//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -11,6 +11,9 @@ // and properties of the instructions which are needed for code generation, // machine code emission, and analysis. // +// All instructions that use MMX should be in this file, even if they also use +// SSE. +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -18,58 +21,23 @@ //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { - // MMXI_binop_rm - Simple MMX binary operator. - multiclass MMXI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (OpVT (OpNode VR64:$src1, VR64:$src2)))]> { - let isCommutable = Commutable; - } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (OpVT (OpNode VR64:$src1, - (bitconvert - (load_mmx addr:$src2)))))]>; - } - + // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. + // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), + def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> { let isCommutable = Commutable; } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), + def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))]>; } - // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64. - // - // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew - // to collapse (bitconvert VT to VT) into its operand. - // - multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit Commutable = 0> { - def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> { - let isCommutable = Commutable; - } - def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (OpNode VR64:$src1,(load_mmx addr:$src2)))]>; - } - multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { @@ -89,14 +57,75 @@ let Constraints = "$src1 = $dst" in { } } +/// Unary MMX instructions requiring SSSE3. +multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, + Intrinsic IntId64> { + def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR64:$dst, (IntId64 VR64:$src))]>; + + def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR64:$dst, + (IntId64 (bitconvert (memopmmx addr:$src))))]>; +} + +/// Binary MMX instructions requiring SSSE3. +let ImmT = NoImm, Constraints = "$src1 = $dst" in { +multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, + Intrinsic IntId64> { + let isCommutable = 0 in + def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; + def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + [(set VR64:$dst, + (IntId64 VR64:$src1, + (bitconvert (memopmmx addr:$src2))))]>; +} +} + +/// PALIGN MMX instructions (require SSSE3). +multiclass ssse3_palign_mm<string asm, Intrinsic IntId> { + def R64irr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>; + def R64irm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR64:$dst, (IntId VR64:$src1, + (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>; +} + +multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { + def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))], d>; + def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; +} + +multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, + PatFrag ld_frag, string asm, Domain d> { + def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), + asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; + def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), + (ins DstRC:$src1, x86memop:$src2), asm, + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; +} + //===----------------------------------------------------------------------===// -// MMX EMMS & FEMMS Instructions +// MMX EMMS Instruction //===----------------------------------------------------------------------===// def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>; -def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", - [(int_x86_mmx_femms)]>; //===----------------------------------------------------------------------===// // MMX Scalar Instructions @@ -106,12 +135,12 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (scalar_to_vector GR32:$src)))]>; -let canFoldAsLoad = 1, isReMaterializable = 1 in + (x86mmx (scalar_to_vector GR32:$src)))]>; +let canFoldAsLoad = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; + (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>; let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; @@ -123,42 +152,41 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -let neverHasSideEffects = 1 in // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as // movd. def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), - "movd\t{$src, $dst|$dst, $src}", []>; + "movd\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, + (bitconvert VR64:$src))]>; def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v1i64 (scalar_to_vector GR64:$src)))]>; - + (bitconvert GR64:$src))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))]>; def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", - [(store (v1i64 VR64:$src), addr:$dst)]>; + [(store (x86mmx VR64:$src), addr:$dst)]>; def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v1i64 (bitconvert + (x86mmx (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))))]>; def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (movl immAllZerosV, - (v2i64 (scalar_to_vector - (i64 (bitconvert (v1i64 VR64:$src)))))))]>; + (v2i64 (scalar_to_vector + (i64 (bitconvert (x86mmx VR64:$src))))))]>; let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), @@ -176,34 +204,40 @@ let AddedComplexity = 15 in def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>; + (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>; let AddedComplexity = 20 in def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (v2i32 (X86vzmovl (v2i32 + (x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))))]>; // Arithmetic Instructions - +defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>; +defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>; +defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>; // -- Addition -defm MMX_PADDB : MMXI_binop_rm<0xFC, "paddb", add, v8i8, 1>; -defm MMX_PADDW : MMXI_binop_rm<0xFD, "paddw", add, v4i16, 1>; -defm MMX_PADDD : MMXI_binop_rm<0xFE, "paddd", add, v2i32, 1>; -defm MMX_PADDQ : MMXI_binop_rm<0xD4, "paddq", add, v1i64, 1>; - +defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>; +defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>; +defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>; +defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>; defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>; defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>; defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>; defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>; +defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>; +defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>; +defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>; + + // -- Subtraction -defm MMX_PSUBB : MMXI_binop_rm<0xF8, "psubb", sub, v8i8>; -defm MMX_PSUBW : MMXI_binop_rm<0xF9, "psubw", sub, v4i16>; -defm MMX_PSUBD : MMXI_binop_rm<0xFA, "psubd", sub, v2i32>; -defm MMX_PSUBQ : MMXI_binop_rm<0xFB, "psubq", sub, v1i64>; +defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>; +defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>; +defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>; +defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>; defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>; defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; @@ -211,16 +245,25 @@ defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>; defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>; +defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>; +defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>; +defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>; + // -- Multiplication -defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>; +defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>; defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>; defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>; defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>; +let isCommutable = 1 in +defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", + int_x86_ssse3_pmul_hr_sw>; // -- Miscellanea defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>; +defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw", + int_x86_ssse3_pmadd_ub_sw>; defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>; defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>; @@ -232,23 +275,17 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>; defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>; -// Logical Instructions -defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>; -defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>; -defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>; +defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>; +defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>; +defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>; +let Constraints = "$src1 = $dst" in + defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>; -let Constraints = "$src1 = $dst" in { - def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), - VR64:$src2)))]>; - def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), - (load addr:$src2))))]>; -} +// Logical Instructions +defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>; +defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>; +defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>; +defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>; // Shift Instructions defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", @@ -270,12 +307,6 @@ defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_mmx_psra_d, int_x86_mmx_psrai_d>; -// Shift up / down and insert zero's. -def : Pat<(v1i64 (X86vshl VR64:$src, (i8 imm:$amt))), - (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>; -def : Pat<(v1i64 (X86vshr VR64:$src, (i8 imm:$amt))), - (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>; - // Comparison Instructions defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>; defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>; @@ -285,84 +316,19 @@ defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>; defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>; defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>; -// Conversion Instructions - // -- Unpack Instructions -let Constraints = "$src1 = $dst" in { - // Unpack High Packed Data Instructions - def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckh VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckh VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckh VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; - - // Unpack Low Packed Data Instructions - def MMX_PUNPCKLBWrr : MMXI<0x60, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLBWrm : MMXI<0x60, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v8i8 (mmx_unpckl VR64:$src1, - (bc_v8i8 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKLWDrr : MMXI<0x61, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLWDrm : MMXI<0x61, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v4i16 (mmx_unpckl VR64:$src1, - (bc_v4i16 (load_mmx addr:$src2)))))]>; - - def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", - [(set VR64:$dst, - (v2i32 (mmx_unpckl VR64:$src1, - (bc_v2i32 (load_mmx addr:$src2)))))]>; -} +defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", + int_x86_mmx_punpckhbw>; +defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", + int_x86_mmx_punpckhwd>; +defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", + int_x86_mmx_punpckhdq>; +defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", + int_x86_mmx_punpcklbw>; +defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", + int_x86_mmx_punpcklwd>; +defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq", + int_x86_mmx_punpckldq>; // -- Pack Instructions defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>; @@ -370,93 +336,80 @@ defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>; defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>; // -- Shuffle Instructions +defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>; + def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (v4i16 (mmx_pshufw:$src2 VR64:$src1, (undef))))]>; + (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (mmx_pshufw:$src2 (bc_v4i16 (load_mmx addr:$src1)), - (undef)))]>; + (int_x86_sse_pshuf_w (load_mmx addr:$src1), + imm:$src2))]>; -// -- Conversion Instructions -let neverHasSideEffects = 1 in { -def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), - (ins f128mem:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), - (ins i64mem:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), - (ins i64mem:$src), - "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), - (ins f128mem:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; - -def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in -def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", []>; -} // end neverHasSideEffects -// Extract / Insert -def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW", - SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, - (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2), +// -- Conversion Instructions +defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, + f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, + f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, + f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, + f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, + i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +let Constraints = "$src1 = $dst" in { + defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, + int_x86_sse_cvtpi2ps, + i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; +} + +// Extract / Insert +def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, + (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1), + [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1, (iPTR imm:$src2)))]>; let Constraints = "$src1 = $dst" in { - def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, + def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, GR32:$src2,i16i8imm:$src3), + (ins VR64:$src1, GR32:$src2, i32i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), - GR32:$src2,(iPTR imm:$src3))))]>; - def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem, + [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, + GR32:$src2, (iPTR imm:$src3)))]>; + + def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3), + (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR64:$dst, - (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), - (i32 (anyext (loadi16 addr:$src2))), - (iPTR imm:$src3))))]>; + [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, + (i32 (anyext (loadi16 addr:$src2))), + (iPTR imm:$src3)))]>; } +// Mask creation +def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, + (int_x86_mmx_pmovmskb VR64:$src))]>; + + // MMX to XMM for vector types def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, - [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>; + [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; @@ -464,14 +417,19 @@ def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))), (v2i64 (MOVQI2PQIrm addr:$src))>; -def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert - (v2i32 (scalar_to_vector (loadi32 addr:$src))))))), +def : Pat<(v2i64 (MMX_X86movq2dq + (x86mmx (scalar_to_vector (loadi32 addr:$src))))), (v2i64 (MOVDI2PDIrm addr:$src))>; -// Mask creation -def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_mmx_pmovmskb VR64:$src))]>; +// Low word of XMM to MMX. +def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, + [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; + +def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)), + (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>; + +def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), + (x86mmx (MMX_MOVQ64rm addr:$src))>; // Misc. let Uses = [EDI] in @@ -483,181 +441,14 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>; -//===----------------------------------------------------------------------===// -// Alias Instructions -//===----------------------------------------------------------------------===// - -// Alias instructions that map zero vector to pxor. -let isReMaterializable = 1, isCodeGenOnly = 1 in { - // FIXME: Change encoding to pseudo. - def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "", - [(set VR64:$dst, (v2i32 immAllZerosV))]>; - def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "", - [(set VR64:$dst, (v2i32 immAllOnesV))]>; -} - -let Predicates = [HasMMX] in { - def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>; - def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>; - def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>; -} - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// Store 64-bit integer vector values. -def : Pat<(store (v8i8 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v4i16 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v2i32 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v1i64 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; - -// Bit convert. -def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>; - // 64-bit bit convert. -def : Pat<(v1i64 (bitconvert (i64 GR64:$src))), +def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v2i32 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v4i16 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v8i8 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(i64 (bitconvert (v1i64 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v2i32 VR64:$src))), +def : Pat<(i64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v4i16 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v8i8 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v1i64 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v2i32 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), +def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), - (MMX_MOVQ2FR64rr VR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FR64:$src))), +def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), (MMX_MOVFR642Qrr FR64:$src)>; -def : Pat<(v8i8 (bitconvert (f64 FR64:$src))), - (MMX_MOVFR642Qrr FR64:$src)>; - -let AddedComplexity = 20 in { - def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; -} - -// Clear top half. -let AddedComplexity = 15 in { - def : Pat<(v2i32 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (v2i32 (MMX_V_SET0)))>; -} - -// Patterns to perform canonical versions of vector shuffling. -let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckl_undef VR64:$src, (undef))), - (MMX_PUNPCKLDQrr VR64:$src, VR64:$src)>; -} -let AddedComplexity = 10 in { - def : Pat<(v8i8 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHBWrr VR64:$src, VR64:$src)>; - def : Pat<(v4i16 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHWDrr VR64:$src, VR64:$src)>; - def : Pat<(v2i32 (mmx_unpckh_undef VR64:$src, (undef))), - (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; -} -// Some special case PANDN patterns. -// FIXME: Get rid of these. -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), - VR64:$src2)), - (MMX_PANDNrr VR64:$src1, VR64:$src2)>; -def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), - (load addr:$src2))), - (MMX_PANDNrm VR64:$src1, addr:$src2)>; - -// Move MMX to lower 64-bit of XMM -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v4i16 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v2i32 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; -def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v1i64 VR64:$src))))), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; - -// Move lower 64-bit of XMM to MMX. -def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>; -def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))), - (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; - -// Patterns for vector comparisons -def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)), - (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>; -def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>; -def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)), - (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>; -def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>; -def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)), - (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>; -def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>; - -def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)), - (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>; -def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>; -def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)), - (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>; -def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>; -def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)), - (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>; -def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), - (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>; - -// CMOV* - Used to implement the SELECT DAG operation. Expanded after -// instruction selection into a branch sequence. -let Uses = [EFLAGS], usesCustomInserter = 1 in { - def CMOV_V1I64 : I<0, Pseudo, - (outs VR64:$dst), (ins VR64:$t, VR64:$f, i8imm:$cond), - "#CMOV_V1I64 PSEUDO!", - [(set VR64:$dst, - (v1i64 (X86cmov VR64:$t, VR64:$f, imm:$cond, - EFLAGS)))]>; -} diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f5466f8..b912949 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -15,43 +15,6 @@ //===----------------------------------------------------------------------===// -// SSE scalar FP Instructions -//===----------------------------------------------------------------------===// - -// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded after -// instruction selection into a branch sequence. -let Uses = [EFLAGS], usesCustomInserter = 1 in { - def CMOV_FR32 : I<0, Pseudo, - (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond), - "#CMOV_FR32 PSEUDO!", - [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond, - EFLAGS))]>; - def CMOV_FR64 : I<0, Pseudo, - (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond), - "#CMOV_FR64 PSEUDO!", - [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond, - EFLAGS))]>; - def CMOV_V4F32 : I<0, Pseudo, - (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), - "#CMOV_V4F32 PSEUDO!", - [(set VR128:$dst, - (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond, - EFLAGS)))]>; - def CMOV_V2F64 : I<0, Pseudo, - (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), - "#CMOV_V2F64 PSEUDO!", - [(set VR128:$dst, - (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond, - EFLAGS)))]>; - def CMOV_V2I64 : I<0, Pseudo, - (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond), - "#CMOV_V2I64 PSEUDO!", - [(set VR128:$dst, - (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond, - EFLAGS)))]>; -} - -//===----------------------------------------------------------------------===// // SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// @@ -82,17 +45,15 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) + [(set RC:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) RC:$src1, RC:$src2))]>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) + [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", + SSEVer, "_", OpcodeStr, FPSizeStr)) RC:$src1, mem_cpat:$src2))]>; } @@ -142,17 +103,15 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) + [(set RC:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) RC:$src1, RC:$src2))], d>; def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) + [(set RC:$dst, (!cast<Intrinsic>( + !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) RC:$src1, (mem_frag addr:$src2)))], d>; } @@ -221,6 +180,12 @@ def : Pat<(v4f32 (scalar_to_vector FR32:$src)), // Implicitly promote a 64-bit scalar to a vector. def : Pat<(v2f64 (scalar_to_vector FR64:$src)), (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; +// Implicitly promote a 32-bit scalar to a vector. +def : Pat<(v8f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v4f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this @@ -403,7 +368,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, string asm_opr> { def PSrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - !strconcat(!strconcat(base_opc,"s"), asm_opr), + !strconcat(base_opc, "s", asm_opr), [(set RC:$dst, (mov_frag RC:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], @@ -411,7 +376,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, def PDrm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, f64mem:$src2), - !strconcat(!strconcat(base_opc,"d"), asm_opr), + !strconcat(base_opc, "d", asm_opr), [(set RC:$dst, (v2f64 (mov_frag RC:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], SSEPackedDouble>, TB, OpSize; @@ -598,14 +563,6 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). -multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { - def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], d>; - def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; -} multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, @@ -618,16 +575,6 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; } -multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, - RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, - PatFrag ld_frag, string asm, Domain d> { - def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), - asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; - def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), - (ins DstRC:$src1, x86memop:$src2), asm, - [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; -} - multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, bit Is2Addr = 1> { @@ -669,13 +616,11 @@ defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, f32mem, load, "cvtss2si">, XS; defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si{q}">, XS, REX_W; -defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si">, XD; -defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, - f128mem, load, "cvtsd2si">, XD, REX_W; +defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si{l}">, XD; +defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, + f128mem, load, "cvtsd2si{q}">, XD, REX_W; -defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD, - REX_W; let isAsmParserOnly = 1 in { defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, @@ -705,29 +650,6 @@ let Constraints = "$src1 = $dst" in { "cvtsi2sd">, XD, REX_W; } -// Instructions below don't have an AVX form. -defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, - f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; -defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, - f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; -defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, - f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; -defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, - f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; -defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, - i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; -let Constraints = "$src1 = $dst" in { - defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, - int_x86_sse_cvtpi2ps, - i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, TB; -} - /// SSE 1 Only // Aliases for intrinsics @@ -738,10 +660,10 @@ defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, f32mem, load, "cvttss2si">, XS, VEX, VEX_W; defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttss2si">, XD, VEX; + f128mem, load, "cvttsd2si">, XD, VEX; defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttss2si">, XD, VEX, VEX_W; + "cvttsd2si">, XD, VEX, VEX_W; } defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, f32mem, load, "cvttss2si">, XS; @@ -749,10 +671,10 @@ defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse_cvttss2si64, f32mem, load, "cvttss2si{q}">, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttss2si">, XD; + f128mem, load, "cvttsd2si">, XD; defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttss2si{q}">, XD, REX_W; + "cvttsd2si{q}">, XD, REX_W; let isAsmParserOnly = 1, Pattern = []<dag> in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, @@ -790,6 +712,9 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; } +def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, + Requires<[HasAVX]>; + def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))]>; @@ -817,6 +742,9 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; } +def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>, + Requires<[HasAVX]>; + def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))]>, XS, @@ -973,9 +901,13 @@ def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", []>; + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))]>; def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", []>; + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; let isAsmParserOnly = 1 in { @@ -990,16 +922,6 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memop addr:$src)))]>, XS, VEX, Requires<[HasAVX]>; } -def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))]>, - XS, Requires<[HasSSE2]>; -def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memop addr:$src)))]>, - XS, Requires<[HasSSE2]>; let isAsmParserOnly = 1 in { def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), @@ -1013,13 +935,13 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>, VEX; } -def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; -def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))]>; +def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; +def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memop addr:$src)))]>; let isAsmParserOnly = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm @@ -1469,9 +1391,11 @@ let AddedComplexity = 10 in { /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { - def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set GR32:$dst, (Int RC:$src))], d>; + def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W; } // Mask creation @@ -1522,6 +1446,12 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>, TB, OpSize; +def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasAVX]>, TB, OpSize, VEX_4V; +def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasAVX]>, TB, OpSize, VEX_4V; } // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper @@ -1654,19 +1584,13 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ // single r+r - [(set VR128:$dst, (v2i64 (and (xor VR128:$src1, - (bc_v2i64 (v4i32 immAllOnesV))), - VR128:$src2)))], + [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))], // double r+r - [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (bc_v2i64 (v2f64 VR128:$src2))))], + [], // single r+m - [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), - (bc_v2i64 (v4i32 immAllOnesV))), - (memopv2i64 addr:$src2))))], + [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))], // double r+m - [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (memopv2i64 addr:$src2)))]]>; + []]>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions @@ -2170,7 +2094,7 @@ def : Pat<(X86SFence), (SFENCE)>; // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. // FIXME: Change encoding to pseudo! This is blocked right now by the x86 -// JIT implementatioan, it does not expand the instructions below like +// JIT implementation, it does not expand the instructions below like // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in { @@ -2277,6 +2201,10 @@ let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", []>; +def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", + []>, XS, Requires<[HasSSE2]>; + let canFoldAsLoad = 1, mayLoad = 1 in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", @@ -2606,15 +2534,11 @@ let ExeDomain = SSEPackedInt in { } def PANDNrr : PDI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - VR128:$src2)))]>; + "pandn\t{$src2, $dst|$dst, $src2}", []>; def PANDNrm : PDI<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (memopv2i64 addr:$src2))))]>; + "pandn\t{$src2, $dst|$dst, $src2}", []>; } } // Constraints = "$src1 = $dst" @@ -3009,6 +2933,13 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; +def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector GR64:$src)))]>; +def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert GR64:$src))]>; // Move Int Doubleword to Single Scalar @@ -3051,6 +2982,21 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>; +def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), + (iPTR 0)))]>; +def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; + +def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bitconvert FR64:$src))]>; +def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; + // Move Scalar Single to Double Int let isAsmParserOnly = 1 in { def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), @@ -3532,18 +3478,6 @@ let Constraints = "$src1 = $dst" in { // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}. -multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, Intrinsic IntId64> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))]>; - - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, - (IntId64 (bitconvert (mem_frag64 addr:$src))))]>; -} /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, @@ -3572,19 +3506,11 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in { } defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, - int_x86_ssse3_pabs_b_128>, - SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8, - int_x86_ssse3_pabs_b>; - + int_x86_ssse3_pabs_b_128>; defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, - int_x86_ssse3_pabs_w_128>, - SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16, - int_x86_ssse3_pabs_w>; - + int_x86_ssse3_pabs_w_128>; defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, - int_x86_ssse3_pabs_d_128>, - SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32, - int_x86_ssse3_pabs_d>; + int_x86_ssse3_pabs_d_128>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions @@ -3611,20 +3537,6 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, (IntId128 VR128:$src1, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, - PatFrag mem_frag64, Intrinsic IntId64> { - let isCommutable = 1 in - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv8i8 addr:$src2))))]>; -} let isAsmParserOnly = 1, Predicates = [HasAVX] in { let isCommutable = 0 in { @@ -3659,54 +3571,30 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16, - int_x86_ssse3_phadd_w_128>, - SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16, - int_x86_ssse3_phadd_w>; + int_x86_ssse3_phadd_w_128>; defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32, - int_x86_ssse3_phadd_d_128>, - SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32, - int_x86_ssse3_phadd_d>; + int_x86_ssse3_phadd_d_128>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16, - int_x86_ssse3_phadd_sw_128>, - SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16, - int_x86_ssse3_phadd_sw>; + int_x86_ssse3_phadd_sw_128>; defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16, - int_x86_ssse3_phsub_w_128>, - SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16, - int_x86_ssse3_phsub_w>; + int_x86_ssse3_phsub_w_128>; defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32, - int_x86_ssse3_phsub_d_128>, - SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32, - int_x86_ssse3_phsub_d>; + int_x86_ssse3_phsub_d_128>; defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16, - int_x86_ssse3_phsub_sw_128>, - SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16, - int_x86_ssse3_phsub_sw>; + int_x86_ssse3_phsub_sw_128>; defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8, - int_x86_ssse3_pmadd_ub_sw_128>, - SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8, - int_x86_ssse3_pmadd_ub_sw>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, - int_x86_ssse3_pshuf_b_128>, - SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8, - int_x86_ssse3_pshuf_b>; + int_x86_ssse3_pmadd_ub_sw_128>; + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8, + int_x86_ssse3_pshuf_b_128>; defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8, - int_x86_ssse3_psign_b_128>, - SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8, - int_x86_ssse3_psign_b>; + int_x86_ssse3_psign_b_128>; defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16, - int_x86_ssse3_psign_w_128>, - SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16, - int_x86_ssse3_psign_w>; + int_x86_ssse3_psign_w_128>; defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32, - int_x86_ssse3_psign_d_128>, - SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32, - int_x86_ssse3_psign_d>; + int_x86_ssse3_psign_d_128>; } defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, - int_x86_ssse3_pmul_hr_sw_128>, - SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16, - int_x86_ssse3_pmul_hr_sw>; + int_x86_ssse3_pmul_hr_sw_128>; } def : Pat<(X86pshufb VR128:$src, VR128:$mask), @@ -3714,19 +3602,17 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask), def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; +def : Pat<(X86psignb VR128:$src1, VR128:$src2), + (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; +def : Pat<(X86psignw VR128:$src1, VR128:$src2), + (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; +def : Pat<(X86psignd VR128:$src1, VR128:$src2), + (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>; + //===---------------------------------------------------------------------===// // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass ssse3_palign_mm<string asm> { - def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; - def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; -} - multiclass ssse3_palign<string asm, bit Is2Addr = 1> { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), @@ -3747,28 +3633,9 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { let isAsmParserOnly = 1, Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst" in - defm PALIGN : ssse3_palign<"palignr">, - ssse3_palign_mm<"palignr">; + defm PALIGN : ssse3_palign<"palignr">; let AddedComplexity = 5 in { - -def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; -def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; - def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)), (PALIGNR128rr VR128:$src2, VR128:$src1, (SHUFFLE_get_palign_imm VR128:$src3))>, @@ -3792,10 +3659,27 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), //===---------------------------------------------------------------------===// // Thread synchronization -def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", - [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", - [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; +let usesCustomInserter = 1 in { +def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), + [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>; +def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), + [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>; +} + +let Uses = [EAX, ECX, EDX] in +def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB, + Requires<[HasSSE3]>; +let Uses = [ECX, EAX] in +def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB, + Requires<[HasSSE3]>; + +def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; +def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; + +def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>, + Requires<[In32BitMode]>; +def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>, + Requires<[In64BitMode]>; //===---------------------------------------------------------------------===// // Non-Instruction Patterns @@ -3811,7 +3695,7 @@ let Predicates = [HasSSE2] in (CVTSS2SDrm addr:$src)>; // bit_convert -let Predicates = [HasSSE2] in { +let Predicates = [HasXMMInt] in { def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; @@ -3844,6 +3728,10 @@ let Predicates = [HasSSE2] in { def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; } +let Predicates = [HasAVX] in { + def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; +} + // Move scalar to XMM zero-extended // movd to XMM register zero-extends let AddedComplexity = 15 in { @@ -4017,36 +3905,11 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; -// Some special case pandn patterns. -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), - VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), - VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), - VR128:$src2)), - (PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))), - (memop addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))), - (memop addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), - (memop addr:$src2))), - (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; - // vector -> vector casts def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))), - (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>; -def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), - (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>; + (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; // Use movaps / movups for SSE integer load / store (one byte shorter). let Predicates = [HasSSE1] in { @@ -4504,7 +4367,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, Intrinsic V4F32Int, Intrinsic V2F64Int> { // Intrinsic operation, reg. // Vector intrinsic operation, reg - def PSr_Int : SS4AIi8<opcps, MRMSrcReg, + def PSr : SS4AIi8<opcps, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4512,7 +4375,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, OpSize; // Vector intrinsic operation, mem - def PSm_Int : Ii8<opcps, MRMSrcMem, + def PSm : Ii8<opcps, MRMSrcMem, (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4522,7 +4385,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, Requires<[HasSSE41]>; // Vector intrinsic operation, reg - def PDr_Int : SS4AIi8<opcpd, MRMSrcReg, + def PDr : SS4AIi8<opcpd, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4530,7 +4393,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, OpSize; // Vector intrinsic operation, mem - def PDm_Int : SS4AIi8<opcpd, MRMSrcMem, + def PDm : SS4AIi8<opcpd, MRMSrcMem, (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4543,28 +4406,28 @@ multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd, RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> { // Intrinsic operation, reg. // Vector intrinsic operation, reg - def PSr : SS4AIi8<opcps, MRMSrcReg, + def PSr_AVX : SS4AIi8<opcps, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // Vector intrinsic operation, mem - def PSm : Ii8<opcps, MRMSrcMem, + def PSm_AVX : Ii8<opcps, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, TA, OpSize, Requires<[HasSSE41]>; // Vector intrinsic operation, reg - def PDr : SS4AIi8<opcpd, MRMSrcReg, + def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, OpSize; // Vector intrinsic operation, mem - def PDm : SS4AIi8<opcpd, MRMSrcMem, + def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -4576,7 +4439,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, Intrinsic F32Int, Intrinsic F64Int, bit Is2Addr = 1> { // Intrinsic operation, reg. - def SSr_Int : SS4AIi8<opcss, MRMSrcReg, + def SSr : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -4587,7 +4450,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, OpSize; // Intrinsic operation, mem. - def SSm_Int : SS4AIi8<opcss, MRMSrcMem, + def SSm : SS4AIi8<opcss, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -4599,7 +4462,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, OpSize; // Intrinsic operation, reg. - def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, + def SDr : SS4AIi8<opcsd, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -4610,7 +4473,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, OpSize; // Intrinsic operation, mem. - def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, + def SDm : SS4AIi8<opcsd, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -4625,28 +4488,28 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd, string OpcodeStr> { // Intrinsic operation, reg. - def SSr : SS4AIi8<opcss, MRMSrcReg, + def SSr_AVX : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; // Intrinsic operation, mem. - def SSm : SS4AIi8<opcss, MRMSrcMem, + def SSm_AVX : SS4AIi8<opcss, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; // Intrinsic operation, reg. - def SDr : SS4AIi8<opcsd, MRMSrcReg, + def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; // Intrinsic operation, mem. - def SDm : SS4AIi8<opcsd, MRMSrcMem, + def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), @@ -4743,6 +4606,29 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; // SSE4.1 - Misc Instructions //===----------------------------------------------------------------------===// +def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS; +def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "popcnt{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS; + +def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop GR32:$src))]>, XS; +def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "popcnt{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS; + +def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop GR64:$src))]>, XS; +def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "popcnt{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS; + + + // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, Intrinsic IntId128> { @@ -4981,6 +4867,9 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; +def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0), + (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; + let isAsmParserOnly = 1, Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", @@ -5032,12 +4921,12 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), // Packed Compare Implicit Length Strings, Return Mask multiclass pseudo_pcmpistrm<string asm> { - def REG : Ii8<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"), + def REG : PseudoI<(outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, imm:$src3))]>; - def MEM : Ii8<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"), + def MEM : PseudoI<(outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2), imm:$src3))]>; } @@ -5068,12 +4957,12 @@ let Defs = [XMM0, EFLAGS] in { // Packed Compare Explicit Length Strings, Return Mask multiclass pseudo_pcmpestrm<string asm> { - def REG : Ii8<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"), + def REG : PseudoI<(outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; - def MEM : Ii8<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"), + def MEM : PseudoI<(outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; } @@ -5555,6 +5444,23 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), @@ -5562,6 +5468,23 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; @@ -5673,19 +5596,14 @@ def : Pat<(X86Movddup (memopv2f64 addr:$src)), def : Pat<(X86Movddup (memopv2f64 addr:$src)), (MOVDDUPrm addr:$src)>; -def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))), - (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))), - (MOVDDUPrm addr:$src)>; - -def : Pat<(X86Movddup (memopv2i64 addr:$src)), +def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (memopv2i64 addr:$src)), +def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), (MOVDDUPrm addr:$src)>; -def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))), +def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; -def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))), +def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), (MOVDDUPrm addr:$src)>; def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))), @@ -5700,6 +5618,7 @@ def : Pat<(X86Movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), (MOVDDUPrm addr:$src)>; + // Shuffle with UNPCKLPS def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; @@ -5724,9 +5643,9 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), // Shuffle with UNPCKLPD def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; + (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; + (UNPCKLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; @@ -5735,9 +5654,9 @@ def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), // Shuffle with UNPCKHPD def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; + (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), - (UNPCKLPSrm VR128:$src1, addr:$src2)>; + (UNPCKHPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; @@ -5812,10 +5731,18 @@ def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; +// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem +// is during lowering, where it's not possible to recognize the load fold cause +// it has two uses through a bitcast. One use disappears at isel time and the +// fold opportunity reappears. +def : Pat<(v2f64 (X86Movddup VR128:$src)), + (UNPCKLPDrr VR128:$src, VR128:$src)>; + // Shuffle with MOVLHPD def : Pat<(v2f64 (X86Movlhpd VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; + // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the @@ -5878,31 +5805,18 @@ def : Pat<(X86Movsldup (memopv4f32 addr:$src)), (MOVSLDUPrm addr:$src)>; // Shuffle with PSHUFHW -def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))), - (PSHUFHWmi addr:$src, imm:$imm)>; def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))), (PSHUFHWri VR128:$src, imm:$imm)>; def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), (PSHUFHWmi addr:$src, imm:$imm)>; // Shuffle with PSHUFLW -def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))), - (PSHUFLWmi addr:$src, imm:$imm)>; def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))), (PSHUFLWri VR128:$src, imm:$imm)>; def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))), (PSHUFLWmi addr:$src, imm:$imm)>; // Shuffle with PALIGN -def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; -def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))), - (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>; - def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), @@ -5920,6 +5834,15 @@ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), def : Pat<(X86Movlps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (MOVLPSrm VR128:$src1, addr:$src2)>; +// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem +// is during lowering, where it's not possible to recognize the load fold cause +// it has two uses through a bitcast. One use disappears at isel time and the +// fold opportunity reappears. +def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; + +def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; // Shuffle with MOVLPD def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td new file mode 100644 index 0000000..8278568 --- /dev/null +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -0,0 +1,746 @@ +//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the shift and rotate instructions. +// +//===----------------------------------------------------------------------===// + +// FIXME: Someone needs to smear multipattern goodness all over this file. + +let Defs = [EFLAGS] in { + +let Constraints = "$src1 = $dst" in { +let Uses = [CL] in { +def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), + "shl{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (shl GR8:$src1, CL))]>; +def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), + "shl{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; +def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), + "shl{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (shl GR32:$src1, CL))]>; +def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), + "shl{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (shl GR64:$src1, CL))]>; +} // Uses = [CL] + +def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "shl{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; + +let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. +def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "shl{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize; +def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "shl{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; +def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "shl{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; + +// NOTE: We don't include patterns for shifts of a register by one, because +// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one). +def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), + "shl{b}\t$dst", []>; +def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), + "shl{w}\t$dst", []>, OpSize; +def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), + "shl{l}\t$dst", []>; +def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), + "shl{q}\t$dst", []>; +} // isConvertibleToThreeAddress = 1 +} // Constraints = "$src = $dst" + + +// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern +// using CL? +let Uses = [CL] in { +def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), + "shl{b}\t{%cl, $dst|$dst, CL}", + [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>; +def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst), + "shl{w}\t{%cl, $dst|$dst, CL}", + [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; +def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst), + "shl{l}\t{%cl, $dst|$dst, CL}", + [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>; +def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), + "shl{q}\t{%cl, $dst|$dst, %CL}", + [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src), + "shl{b}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src), + "shl{w}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; +def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src), + "shl{l}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src), + "shl{q}\t{$src, $dst|$dst, $src}", + [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + +// Shift by 1 +def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst), + "shl{b}\t$dst", + [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst), + "shl{w}\t$dst", + [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; +def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), + "shl{l}\t$dst", + [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), + "shl{q}\t$dst", + [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let Constraints = "$src1 = $dst" in { +let Uses = [CL] in { +def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), + "shr{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (srl GR8:$src1, CL))]>; +def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), + "shr{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; +def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), + "shr{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (srl GR32:$src1, CL))]>; +def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), + "shr{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (srl GR64:$src1, CL))]>; +} + +def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "shr{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; +def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "shr{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize; +def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "shr{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>; +def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), + "shr{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; + +// Shift right by 1 +def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1), + "shr{b}\t$dst", + [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>; +def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1), + "shr{w}\t$dst", + [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize; +def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), + "shr{l}\t$dst", + [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; +def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), + "shr{q}\t$dst", + [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; +} // Constraints = "$src = $dst" + + +let Uses = [CL] in { +def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), + "shr{b}\t{%cl, $dst|$dst, CL}", + [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>; +def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst), + "shr{w}\t{%cl, $dst|$dst, CL}", + [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>, + OpSize; +def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst), + "shr{l}\t{%cl, $dst|$dst, CL}", + [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>; +def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), + "shr{q}\t{%cl, $dst|$dst, %CL}", + [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src), + "shr{b}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src), + "shr{w}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; +def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src), + "shr{l}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src), + "shr{q}\t{$src, $dst|$dst, $src}", + [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + +// Shift by 1 +def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst), + "shr{b}\t$dst", + [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst), + "shr{w}\t$dst", + [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize; +def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), + "shr{l}\t$dst", + [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), + "shr{q}\t$dst", + [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let Constraints = "$src1 = $dst" in { +let Uses = [CL] in { +def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), + "sar{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (sra GR8:$src1, CL))]>; +def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), + "sar{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; +def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), + "sar{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (sra GR32:$src1, CL))]>; +def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), + "sar{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (sra GR64:$src1, CL))]>; +} + +def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "sar{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; +def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "sar{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>, + OpSize; +def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "sar{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>; +def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "sar{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>; + +// Shift by 1 +def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), + "sar{b}\t$dst", + [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>; +def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1), + "sar{w}\t$dst", + [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize; +def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), + "sar{l}\t$dst", + [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; +def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), + "sar{q}\t$dst", + [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; +} // Constraints = "$src = $dst" + + +let Uses = [CL] in { +def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), + "sar{b}\t{%cl, $dst|$dst, CL}", + [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>; +def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst), + "sar{w}\t{%cl, $dst|$dst, CL}", + [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; +def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), + "sar{l}\t{%cl, $dst|$dst, CL}", + [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>; +def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), + "sar{q}\t{%cl, $dst|$dst, %CL}", + [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src), + "sar{b}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src), + "sar{w}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; +def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src), + "sar{l}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src), + "sar{q}\t{$src, $dst|$dst, $src}", + [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + +// Shift by 1 +def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst), + "sar{b}\t$dst", + [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst), + "sar{w}\t$dst", + [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; +def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), + "sar{l}\t$dst", + [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), + "sar{q}\t$dst", + [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +//===----------------------------------------------------------------------===// +// Rotate instructions +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst" in { +def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), + "rcl{b}\t$dst", []>; +def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; + +def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "rcl{w}\t$dst", []>, OpSize; +def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +let Uses = [CL] in +def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + +def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "rcl{l}\t$dst", []>; +def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; + + +def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1), + "rcl{q}\t$dst", []>; +def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; + + +def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), + "rcr{b}\t$dst", []>; +def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; + +def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "rcr{w}\t$dst", []>, OpSize; +def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +let Uses = [CL] in +def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; + +def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "rcr{l}\t$dst", []>; +def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; + +def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1), + "rcr{q}\t$dst", []>; +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in +def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; + +} // Constraints = "$src = $dst" + +def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t$dst", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t$dst", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t$dst", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t$dst", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t$dst", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t$dst", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t$dst", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t$dst", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +let Uses = [CL] in { +def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; + +def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} + +let Constraints = "$src1 = $dst" in { +// FIXME: provide shorter instructions when imm8 == 1 +let Uses = [CL] in { +def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "rol{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (rotl GR8:$src1, CL))]>; +def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), + "rol{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; +def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), + "rol{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (rotl GR32:$src1, CL))]>; +def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), + "rol{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (rotl GR64:$src1, CL))]>; +} + +def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "rol{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; +def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "rol{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, + OpSize; +def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "rol{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>; +def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "rol{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>; + +// Rotate by 1 +def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "rol{b}\t$dst", + [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>; +def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1), + "rol{w}\t$dst", + [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize; +def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), + "rol{l}\t$dst", + [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; +def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), + "rol{q}\t$dst", + [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; +} // Constraints = "$src = $dst" + +let Uses = [CL] in { +def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), + "rol{b}\t{%cl, $dst|$dst, CL}", + [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>; +def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst), + "rol{w}\t{%cl, $dst|$dst, CL}", + [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; +def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst), + "rol{l}\t{%cl, $dst|$dst, CL}", + [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>; +def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), + "rol{q}\t{%cl, $dst|$dst, %CL}", + [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1), + "rol{b}\t{$src1, $dst|$dst, $src1}", + [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; +def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1), + "rol{w}\t{$src1, $dst|$dst, $src1}", + [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>, + OpSize; +def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1), + "rol{l}\t{$src1, $dst|$dst, $src1}", + [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; +def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1), + "rol{q}\t{$src1, $dst|$dst, $src1}", + [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>; + +// Rotate by 1 +def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst), + "rol{b}\t$dst", + [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst), + "rol{w}\t$dst", + [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; +def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), + "rol{l}\t$dst", + [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), + "rol{q}\t$dst", + [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + +let Constraints = "$src1 = $dst" in { +let Uses = [CL] in { +def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "ror{b}\t{%cl, $dst|$dst, CL}", + [(set GR8:$dst, (rotr GR8:$src1, CL))]>; +def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), + "ror{w}\t{%cl, $dst|$dst, CL}", + [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; +def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), + "ror{l}\t{%cl, $dst|$dst, CL}", + [(set GR32:$dst, (rotr GR32:$src1, CL))]>; +def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), + "ror{q}\t{%cl, $dst|$dst, %CL}", + [(set GR64:$dst, (rotr GR64:$src1, CL))]>; +} + +def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), + "ror{b}\t{$src2, $dst|$dst, $src2}", + [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; +def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), + "ror{w}\t{$src2, $dst|$dst, $src2}", + [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, + OpSize; +def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), + "ror{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>; +def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "ror{q}\t{$src2, $dst|$dst, $src2}", + [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>; + +// Rotate by 1 +def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "ror{b}\t$dst", + [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>; +def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1), + "ror{w}\t$dst", + [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize; +def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), + "ror{l}\t$dst", + [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; +def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), + "ror{q}\t$dst", + [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; +} // Constraints = "$src = $dst" + +let Uses = [CL] in { +def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), + "ror{b}\t{%cl, $dst|$dst, CL}", + [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>; +def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst), + "ror{w}\t{%cl, $dst|$dst, CL}", + [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize; +def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), + "ror{l}\t{%cl, $dst|$dst, CL}", + [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>; +def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), + "ror{q}\t{%cl, $dst|$dst, %CL}", + [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>; +} +def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src), + "ror{b}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src), + "ror{w}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>, + OpSize; +def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src), + "ror{l}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src), + "ror{q}\t{$src, $dst|$dst, $src}", + [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>; + +// Rotate by 1 +def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst), + "ror{b}\t$dst", + [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>; +def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst), + "ror{w}\t$dst", + [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>, + OpSize; +def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), + "ror{l}\t$dst", + [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; +def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), + "ror{q}\t$dst", + [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; + + +//===----------------------------------------------------------------------===// +// Double shift instructions (generalizations of rotate) +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst" in { + +let Uses = [CL] in { +def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>, + TB, OpSize; +def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2), + "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>, + TB, OpSize; +def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB; +def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB; +def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, + TB; +def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, + TB; +} + +let isCommutable = 1 in { // These instructions commute to each other. +def SHLD16rri8 : Ii8<0xA4, MRMDestReg, + (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2, i8imm:$src3), + "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, + (i8 imm:$src3)))]>, + TB, OpSize; +def SHRD16rri8 : Ii8<0xAC, MRMDestReg, + (outs GR16:$dst), + (ins GR16:$src1, GR16:$src2, i8imm:$src3), + "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, + (i8 imm:$src3)))]>, + TB, OpSize; +def SHLD32rri8 : Ii8<0xA4, MRMDestReg, + (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2, i8imm:$src3), + "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, + (i8 imm:$src3)))]>, + TB; +def SHRD32rri8 : Ii8<0xAC, MRMDestReg, + (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2, i8imm:$src3), + "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, + (i8 imm:$src3)))]>, + TB; +def SHLD64rri8 : RIi8<0xA4, MRMDestReg, + (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2, i8imm:$src3), + "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, + (i8 imm:$src3)))]>, + TB; +def SHRD64rri8 : RIi8<0xAC, MRMDestReg, + (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2, i8imm:$src3), + "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, + (i8 imm:$src3)))]>, + TB; +} +} // Constraints = "$src = $dst" + +let Uses = [CL] in { +def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), + addr:$dst)]>, TB, OpSize; +def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL), + addr:$dst)]>, TB, OpSize; + +def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL), + addr:$dst)]>, TB; +def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", + [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL), + addr:$dst)]>, TB; + +def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL), + addr:$dst)]>, TB; +def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}", + [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL), + addr:$dst)]>, TB; +} + +def SHLD16mri8 : Ii8<0xA4, MRMDestMem, + (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), + "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi16 addr:$dst), GR16:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB, OpSize; +def SHRD16mri8 : Ii8<0xAC, MRMDestMem, + (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3), + "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB, OpSize; + +def SHLD32mri8 : Ii8<0xA4, MRMDestMem, + (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), + "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi32 addr:$dst), GR32:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; +def SHRD32mri8 : Ii8<0xAC, MRMDestMem, + (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3), + "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; + +def SHLD64mri8 : RIi8<0xA4, MRMDestMem, + (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), + "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shld (loadi64 addr:$dst), GR64:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; +def SHRD64mri8 : RIi8<0xAC, MRMDestMem, + (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3), + "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}", + [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, + (i8 imm:$src3)), addr:$dst)]>, + TB; + +} // Defs = [EFLAGS] + diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td new file mode 100644 index 0000000..1a58ba0 --- /dev/null +++ b/lib/Target/X86/X86InstrSystem.td @@ -0,0 +1,390 @@ +//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the X86 instructions that are generally used in +// privileged modes. These are not typically used by the compiler, but are +// supported for the assembler and disassembler. +// +//===----------------------------------------------------------------------===// + +let Defs = [RAX, RDX] in + def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; + +let Defs = [RAX, RCX, RDX] in + def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + +// CPU flow control instructions + +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { + def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; + def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB; +} + +def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>; +def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB; + +// Interrupt and SysCall Instructions. +let Uses = [EFLAGS] in + def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", + [(int_x86_int (i8 3))]>; +def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", + [(int_x86_int imm:$trap)]>; + +def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; +def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB; +def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB, + Requires<[In64BitMode]>; + +def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB; + +def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB, + Requires<[In32BitMode]>; +def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB, + Requires<[In64BitMode]>; + +def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize; +def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>; +def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>, + Requires<[In64BitMode]>; + + +//===----------------------------------------------------------------------===// +// Input/Output Instructions. +// +let Defs = [AL], Uses = [DX] in +def IN8rr : I<0xEC, RawFrm, (outs), (ins), + "in{b}\t{%dx, %al|%AL, %DX}", []>; +let Defs = [AX], Uses = [DX] in +def IN16rr : I<0xED, RawFrm, (outs), (ins), + "in{w}\t{%dx, %ax|%AX, %DX}", []>, OpSize; +let Defs = [EAX], Uses = [DX] in +def IN32rr : I<0xED, RawFrm, (outs), (ins), + "in{l}\t{%dx, %eax|%EAX, %DX}", []>; + +let Defs = [AL] in +def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port), + "in{b}\t{$port, %al|%AL, $port}", []>; +let Defs = [AX] in +def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), + "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize; +let Defs = [EAX] in +def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), + "in{l}\t{$port, %eax|%EAX, $port}", []>; + +let Uses = [DX, AL] in +def OUT8rr : I<0xEE, RawFrm, (outs), (ins), + "out{b}\t{%al, %dx|%DX, %AL}", []>; +let Uses = [DX, AX] in +def OUT16rr : I<0xEF, RawFrm, (outs), (ins), + "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize; +let Uses = [DX, EAX] in +def OUT32rr : I<0xEF, RawFrm, (outs), (ins), + "out{l}\t{%eax, %dx|%DX, %EAX}", []>; + +let Uses = [AL] in +def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port), + "out{b}\t{%al, $port|$port, %AL}", []>; +let Uses = [AX] in +def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), + "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize; +let Uses = [EAX] in +def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), + "out{l}\t{%eax, $port|$port, %EAX}", []>; + +def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>; +def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize; +def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>; + +//===----------------------------------------------------------------------===// +// Moves to and from debug registers + +def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +//===----------------------------------------------------------------------===// +// Moves to and from control registers + +def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + +//===----------------------------------------------------------------------===// +// Segment override instruction prefixes + +def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>; +def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>; +def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>; +def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>; +def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>; +def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; + + +//===----------------------------------------------------------------------===// +// Moves to and from segment registers. +// + +def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; + +//===----------------------------------------------------------------------===// +// Segmentation support instructions. + +def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; + +def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + +// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. +def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; +// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. +def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; + +def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), + "str{w}\t{$dst}", []>, TB; +def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), + "str{w}\t{$dst}", []>, TB; +def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), + "ltr{w}\t{$src}", []>, TB; +def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), + "ltr{w}\t{$src}", []>, TB; + +def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), + "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize; +def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), + "push{l}\t%cs", []>, Requires<[In32BitMode]>; +def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), + "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize; +def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), + "push{l}\t%ss", []>, Requires<[In32BitMode]>; +def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), + "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize; +def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), + "push{l}\t%ds", []>, Requires<[In32BitMode]>; +def PUSHES16 : I<0x06, RawFrm, (outs), (ins), + "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize; +def PUSHES32 : I<0x06, RawFrm, (outs), (ins), + "push{l}\t%es", []>, Requires<[In32BitMode]>; + +def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), + "push{w}\t%fs", []>, OpSize, TB; +def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), + "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>; +def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), + "push{w}\t%gs", []>, OpSize, TB; +def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), + "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>; + +def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), + "push{q}\t%fs", []>, TB; +def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), + "push{q}\t%gs", []>, TB; + +// No "pop cs" instruction. +def POPSS16 : I<0x17, RawFrm, (outs), (ins), + "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>; +def POPSS32 : I<0x17, RawFrm, (outs), (ins), + "pop{l}\t%ss", []> , Requires<[In32BitMode]>; + +def POPDS16 : I<0x1F, RawFrm, (outs), (ins), + "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>; +def POPDS32 : I<0x1F, RawFrm, (outs), (ins), + "pop{l}\t%ds", []> , Requires<[In32BitMode]>; + +def POPES16 : I<0x07, RawFrm, (outs), (ins), + "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>; +def POPES32 : I<0x07, RawFrm, (outs), (ins), + "pop{l}\t%es", []> , Requires<[In32BitMode]>; + +def POPFS16 : I<0xa1, RawFrm, (outs), (ins), + "pop{w}\t%fs", []>, OpSize, TB; +def POPFS32 : I<0xa1, RawFrm, (outs), (ins), + "pop{l}\t%fs", []>, TB , Requires<[In32BitMode]>; +def POPFS64 : I<0xa1, RawFrm, (outs), (ins), + "pop{q}\t%fs", []>, TB; + +def POPGS16 : I<0xa9, RawFrm, (outs), (ins), + "pop{w}\t%gs", []>, OpSize, TB; +def POPGS32 : I<0xa9, RawFrm, (outs), (ins), + "pop{l}\t%gs", []>, TB , Requires<[In32BitMode]>; +def POPGS64 : I<0xa9, RawFrm, (outs), (ins), + "pop{q}\t%gs", []>, TB; + + +def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lds{l}\t{$src, $dst|$dst, $src}", []>; + +def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lss{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lss{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "les{l}\t{$src, $dst|$dst, $src}", []>; + +def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB; + +def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), + "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), + "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB; + +def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), + "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB; + + +def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), + "verr\t$seg", []>, TB; +def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), + "verr\t$seg", []>, TB; +def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), + "verw\t$seg", []>, TB; +def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), + "verw\t$seg", []>, TB; + +//===----------------------------------------------------------------------===// +// Descriptor-table support instructions + +def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), + "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>; +def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), + "sgdt\t$dst", []>, TB; +def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), + "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>; +def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), + "sidt\t$dst", []>, TB; +def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), + "sldt{w}\t$dst", []>, TB, OpSize; +def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins), + "sldt{w}\t$dst", []>, TB; +def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins), + "sldt{l}\t$dst", []>, TB; + +// LLDT is not interpreted specially in 64-bit mode because there is no sign +// extension. +def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), + "sldt{q}\t$dst", []>, TB; +def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), + "sldt{q}\t$dst", []>, TB; + +def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), + "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>; +def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), + "lgdt\t$src", []>, TB; +def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), + "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>; +def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), + "lidt\t$src", []>, TB; +def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), + "lldt{w}\t$src", []>, TB; +def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), + "lldt{w}\t$src", []>, TB; + +//===----------------------------------------------------------------------===// +// Specialized register support +def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB; +def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB; +def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; + +def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), + "smsw{w}\t$dst", []>, OpSize, TB; +def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), + "smsw{l}\t$dst", []>, TB; +// no m form encodable; use SMSW16m +def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), + "smsw{q}\t$dst", []>, TB; + +// For memory operands, there is only a 16-bit form +def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins), + "smsw{w}\t$dst", []>, TB; + +def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src), + "lmsw{w}\t$src", []>, TB; +def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), + "lmsw{w}\t$src", []>, TB; + +def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; + +//===----------------------------------------------------------------------===// +// Cache instructions +def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; +def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; + diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td new file mode 100644 index 0000000..daf61e4 --- /dev/null +++ b/lib/Target/X86/X86InstrVMX.td @@ -0,0 +1,54 @@ +//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions that make up the Intel VMX instruction +// set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// VMX instructions + +// 66 0F 38 80 +def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; +// 66 0F 38 81 +def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; +// 0F 01 C1 +def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; +def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), + "vmclear\t$vmcs", []>, OpSize, TB; +// 0F 01 C2 +def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; +// 0F 01 C3 +def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; +def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), + "vmptrld\t$vmcs", []>, TB; +def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), + "vmptrst\t$vmcs", []>, TB; +def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src), + "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), + "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src), + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), + "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; +def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; +// 0F 01 C4 +def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; +def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), + "vmxon\t{$vmxon}", []>, XS; + diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 6f0a8d9..3f88fa6 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -19,7 +19,7 @@ #include "llvm/Function.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/System/Valgrind.h" +#include "llvm/Support/Valgrind.h" #include <cstdlib> #include <cstring> using namespace llvm; @@ -127,9 +127,17 @@ extern "C" { "movaps %xmm6, 96(%rsp)\n" "movaps %xmm7, 112(%rsp)\n" // JIT callee +#ifdef _WIN64 + "subq $32, %rsp\n" + "movq %rbp, %rcx\n" // Pass prev frame and return address + "movq 8(%rbp), %rdx\n" + "call " ASMPREFIX "X86CompilationCallback2\n" + "addq $32, %rsp\n" +#else "movq %rbp, %rdi\n" // Pass prev frame and return address "movq 8(%rbp), %rsi\n" "call " ASMPREFIX "X86CompilationCallback2\n" +#endif // Restore all XMM arg registers "movaps 112(%rsp), %xmm7\n" "movaps 96(%rsp), %xmm6\n" @@ -333,11 +341,11 @@ extern "C" { extern "C" { #if !(defined (X86_64_JIT) && defined(_MSC_VER)) // the following function is called only from this translation unit, - // unless we are under 64bit Windows with MSC, where there is + // unless we are under 64bit Windows with MSC, where there is // no support for inline assembly static #endif -void ATTRIBUTE_USED +void LLVM_ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; assert(*RetAddrLoc == RetAddr && @@ -462,7 +470,7 @@ TargetJITInfo::StubLayout X86JITInfo::getStubLayout() { void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE) { - // Note, we cast to intptr_t here to silence a -pedantic warning that + // Note, we cast to intptr_t here to silence a -pedantic warning that // complains about casting a function pointer to a normal pointer. #if defined (X86_32_JIT) && !defined (_MSC_VER) bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback && diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 36badb4..6686214 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" using namespace llvm; enum AsmWriterFlavorTy { @@ -68,7 +69,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { DwarfUsesInlineInfoSection = true; // Exceptions handling - ExceptionsType = ExceptionHandling::Dwarf; + ExceptionsType = ExceptionHandling::DwarfTable; } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { @@ -88,8 +89,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = ExceptionHandling::Dwarf; - + ExceptionsType = ExceptionHandling::DwarfTable; + // OpenBSD has buggy support for .quad in 32-bit mode, just split into two // .words. if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86) @@ -98,13 +99,15 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { const MCSection *X86ELFMCAsmInfo:: getNonexecutableStackSection(MCContext &Ctx) const { - return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, - 0, SectionKind::getMetadata(), false); + return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, + 0, SectionKind::getMetadata()); } X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) + if (Triple.getArch() == Triple::x86_64) { GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + } AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 9564fe0..e6dc74e 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-emitter" +#define DEBUG_TYPE "mccodeemitter" #include "X86.h" #include "X86InstrInfo.h" #include "X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -37,27 +38,6 @@ public: ~X86MCCodeEmitter() {} - unsigned getNumFixupKinds() const { - return 5; - } - - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, - { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel }, - { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel }, - { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, - { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel } - }; - - if (Kind < FirstTargetFixupKind) - return MCCodeEmitter::getFixupKindInfo(Kind); - - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } @@ -170,41 +150,77 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) { unsigned Size = X86II::getSizeOfImm(TSFlags); bool isPCRel = X86II::isImmPCRel(TSFlags); - switch (Size) { - default: assert(0 && "Unknown immediate size"); - case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; - case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2; - case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; - case 8: assert(!isPCRel); return FK_Data_8; - } + return MCFixup::getKindForSize(Size, isPCRel); +} + +/// Is32BitMemOperand - Return true if the specified instruction with a memory +/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit +/// memory operand. Op specifies the operand # of the memoperand. +static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { + const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg()))) + return true; + return false; } +/// StartsWithGlobalOffsetTable - Return true for the simple cases where this +/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support +/// PIC on ELF i386 as that symbol is magic. We check only simple case that +/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start +/// of a binary expression. +static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) { + if (Expr->getKind() == MCExpr::Binary) { + const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr); + Expr = BE->getLHS(); + } + + if (Expr->getKind() != MCExpr::SymbolRef) + return false; + + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + const MCSymbol &S = Ref->getSymbol(); + return S.getName() == "_GLOBAL_OFFSET_TABLE_"; +} void X86MCCodeEmitter:: EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { - // If this is a simple integer displacement that doesn't require a relocation, - // emit it now. + const MCExpr *Expr = NULL; if (DispOp.isImm()) { - // FIXME: is this right for pc-rel encoding?? Probably need to emit this as - // a fixup if so. - EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); - return; + // If this is a simple integer displacement that doesn't require a relocation, + // emit it now. + if (FixupKind != FK_PCRel_1 && + FixupKind != FK_PCRel_2 && + FixupKind != FK_PCRel_4) { + EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); + return; + } + Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx); + } else { + Expr = DispOp.getExpr(); } // If we have an immoffset, add it to the expression. - const MCExpr *Expr = DispOp.getExpr(); + if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) { + assert(ImmOffset == 0); + + FixupKind = MCFixupKind(X86::reloc_global_offset_table); + ImmOffset = CurByte; + } // If the fixup is pc-relative, we need to bias the value to be relative to // the start of the field, not the end of the field. - if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || + if (FixupKind == FK_PCRel_4 || FixupKind == MCFixupKind(X86::reloc_riprel_4byte) || FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load)) ImmOffset -= 4; - if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte)) + if (FixupKind == FK_PCRel_2) ImmOffset -= 2; - if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) + if (FixupKind == FK_PCRel_1) ImmOffset -= 1; if (ImmOffset) @@ -221,10 +237,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const{ - const MCOperand &Disp = MI.getOperand(Op+3); - const MCOperand &Base = MI.getOperand(Op); - const MCOperand &Scale = MI.getOperand(Op+1); - const MCOperand &IndexReg = MI.getOperand(Op+2); + const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp); + const MCOperand &Base = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); unsigned BaseReg = Base.getReg(); // Handle %rip relative addressing. @@ -238,8 +254,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // movq loads are handled with a special relocation form which allows the // linker to eliminate some loads for GOT references which end up in the // same linkage unit. - if (MI.getOpcode() == X86::MOV64rm || - MI.getOpcode() == X86::MOV64rm_TC) + if (MI.getOpcode() == X86::MOV64rm) FixupKind = X86::reloc_riprel_4byte_movq_load; // rip-relative addressing is actually relative to the *next* instruction. @@ -295,7 +310,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, + Fixups); return; } @@ -355,7 +371,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, if (ForceDisp8) EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); else if (ForceDisp32 || Disp.getImm() != 0) - EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, + Fixups); } /// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix @@ -708,14 +725,15 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if ((TSFlags & X86II::Op0Mask) == X86II::REP) EmitByte(0xF3, CurByte, OS); + // Emit the address size opcode prefix as needed. + if ((TSFlags & X86II::AdSize) || + (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand))) + EmitByte(0x67, CurByte, OS); + // Emit the operand size opcode prefix as needed. if (TSFlags & X86II::OpSize) EmitByte(0x66, CurByte, OS); - // Emit the address size opcode prefix as needed. - if (TSFlags & X86II::AdSize) - EmitByte(0x67, CurByte, OS); - bool Need0FPrefix = false; switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); @@ -806,6 +824,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags >> 32) & X86II::VEX_4V) HasVEX_4V = true; + // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); if (MemoryOperand != -1) MemoryOperand += CurOp; @@ -815,7 +834,12 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); + + if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode) + BaseOpcode = 0x0F; // Weird 3DNow! encoding. + unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: @@ -828,6 +852,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); break; + case X86II::RawFrmImm8: + EmitByte(BaseOpcode, CurByte, OS); + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups); + break; case X86II::RawFrmImm16: EmitByte(BaseOpcode, CurByte, OS); EmitImmediate(MI.getOperand(CurOp++), @@ -963,12 +994,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, RegNum |= GetX86RegNum(MO) << 4; EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, Fixups); - } else + } else { + unsigned FixupKind; + // FIXME: Is there a better way to know that we need a signed relocation? + if (MI.getOpcode() == X86::MOV64ri32 || + MI.getOpcode() == X86::MOV64mi32 || + MI.getOpcode() == X86::PUSH64i32) + FixupKind = X86::reloc_signed_4byte; + else + FixupKind = getImmFixupKind(TSFlags); EmitImmediate(MI.getOperand(CurOp++), - X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind), CurByte, OS, Fixups); + } } + if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode) + EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS); + #ifndef NDEBUG // FIXME: Verify. diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 8c4620f..cbe6db2 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "InstPrinter/X86ATTInstPrinter.h" #include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" @@ -38,11 +39,6 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { } -MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { - return static_cast<const X86TargetLowering*>(TM.getTargetLowering())-> - getPICBaseSymbol(&MF, Ctx); -} - /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol /// operand to an MCSymbol. MCSymbol *X86MCInstLower:: @@ -154,7 +150,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); // Subtract the pic base. Expr = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(GetPICBaseSymbol(), + MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), Ctx); break; @@ -173,7 +169,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Expr = MCSymbolRefExpr::Create(Sym, Ctx); // Subtract the pic base. Expr = MCBinaryExpr::CreateSub(Expr, - MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), + MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI() && MAI.hasSetDirective()) { // If .set directive is supported, use it to reduce the number of @@ -326,8 +322,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); - break; case MachineOperand::MO_ExternalSymbol: MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); break; @@ -347,6 +341,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } // Handle a few special cases to eliminate operand modifiers. +ReSimplify: switch (OutMI.getOpcode()) { case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. lower_lea64_32mem(&OutMI, 1); @@ -377,11 +372,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break; - case X86::MMX_V_SETALLONES: - LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break; case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; + case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break; case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; @@ -417,6 +411,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; } + case X86::EH_RETURN: + case X86::EH_RETURN64: { + OutMI = MCInst(); + OutMI.setOpcode(X86::RET); + break; + } + // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. case X86::TAILJMPr: case X86::TAILJMPd: @@ -436,6 +437,19 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; } + // These are pseudo-ops for OR to help with the OR->ADD transformation. We do + // this with an ugly goto in case the resultant OR uses EAX and needs the + // short form. + case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify; + case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify; + case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify; + case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify; + case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify; + case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify; + case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify; + case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; + case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; + // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the @@ -513,6 +527,66 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } +static void LowerTlsAddr(MCStreamer &OutStreamer, + X86MCInstLower &MCInstLowering, + const MachineInstr &MI) { + bool is64Bits = MI.getOpcode() == X86::TLS_addr64; + MCContext &context = OutStreamer.getContext(); + + if (is64Bits) { + MCInst prefix; + prefix.setOpcode(X86::DATA16_PREFIX); + OutStreamer.EmitInstruction(prefix); + } + MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); + const MCSymbolRefExpr *symRef = + MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context); + + MCInst LEA; + if (is64Bits) { + LEA.setOpcode(X86::LEA64r); + LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest + LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base + LEA.addOperand(MCOperand::CreateImm(1)); // scale + LEA.addOperand(MCOperand::CreateReg(0)); // index + LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp + LEA.addOperand(MCOperand::CreateReg(0)); // seg + } else { + LEA.setOpcode(X86::LEA32r); + LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest + LEA.addOperand(MCOperand::CreateReg(0)); // base + LEA.addOperand(MCOperand::CreateImm(1)); // scale + LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index + LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp + LEA.addOperand(MCOperand::CreateReg(0)); // seg + } + OutStreamer.EmitInstruction(LEA); + + if (is64Bits) { + MCInst prefix; + prefix.setOpcode(X86::DATA16_PREFIX); + OutStreamer.EmitInstruction(prefix); + prefix.setOpcode(X86::DATA16_PREFIX); + OutStreamer.EmitInstruction(prefix); + prefix.setOpcode(X86::REX64_PREFIX); + OutStreamer.EmitInstruction(prefix); + } + + MCInst call; + if (is64Bits) + call.setOpcode(X86::CALL64pcrel32); + else + call.setOpcode(X86::CALLpcrel32); + StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; + MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name); + const MCSymbolRefExpr *tlsRef = + MCSymbolRefExpr::Create(tlsGetAddr, + MCSymbolRefExpr::VK_PLT, + context); + + call.addOperand(MCOperand::CreateExpr(tlsRef)); + OutStreamer.EmitInstruction(call); +} void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(Mang, *MF, *this); @@ -532,13 +606,26 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER")); return; + + case X86::EH_RETURN: + case X86::EH_RETURN64: { + // Lower these as normal, but add some comments. + unsigned Reg = MI->getOperand(0).getReg(); + OutStreamer.AddComment(StringRef("eh_return, addr: %") + + X86ATTInstPrinter::getRegisterName(Reg)); + break; + } case X86::TAILJMPr: case X86::TAILJMPd: case X86::TAILJMPd64: // Lower these as normal, but add some comments. OutStreamer.AddComment("TAILCALL"); break; - + + case X86::TLS_addr32: + case X86::TLS_addr64: + return LowerTlsAddr(OutStreamer, MCInstLowering, *MI); + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which @@ -548,7 +635,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // popl %esi // Emit the call. - MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol(); + MCSymbol *PICBase = MF->getPICBaseSymbol(); TmpInst.setOpcode(X86::CALLpcrel32); // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. @@ -586,7 +673,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); const MCExpr *PICBase = - MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext); + MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext); DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h index 539b09b..0210072 100644 --- a/lib/Target/X86/X86MCInstLower.h +++ b/lib/Target/X86/X86MCInstLower.h @@ -40,8 +40,6 @@ public: void Lower(const MachineInstr *MI, MCInst &OutMI) const; - MCSymbol *GetPICBaseSymbol() const; - MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp new file mode 100644 index 0000000..8f3dd32 --- /dev/null +++ b/lib/Target/X86/X86MachObjectWriter.cpp @@ -0,0 +1,32 @@ +//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/MC/MCMachObjectWriter.h" +using namespace llvm; + +namespace { +class X86MachObjectWriter : public MCMachObjectTargetWriter { +public: + X86MachObjectWriter(bool Is64Bit, uint32_t CPUType, + uint32_t CPUSubtype) + : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, + /*UseAggressiveSymbolFolding=*/Is64Bit) {} +}; +} + +MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype) { + return createMachObjectWriter(new X86MachObjectWriter(Is64Bit, + CPUType, + CPUSubtype), + OS, /*IsLittleEndian=*/true); +} diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index fedd49e..2f6bd88 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -31,7 +31,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -41,7 +41,7 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt<bool> +cl::opt<bool> ForceStackAlign("force-align-stack", cl::desc("Force align the stack to the minimum alignment" " needed for the function."), @@ -60,7 +60,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); Is64Bit = Subtarget->is64Bit(); IsWin64 = Subtarget->isTargetWin64(); - StackAlign = TM.getFrameInfo()->getStackAlignment(); + StackAlign = TM.getFrameLowering()->getStackAlignment(); if (Is64Bit) { SlotSize = 8; @@ -159,46 +159,21 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::YMM7: case X86::YMM15: case X86::MM7: return 7; - case X86::ES: - return 0; - case X86::CS: - return 1; - case X86::SS: - return 2; - case X86::DS: - return 3; - case X86::FS: - return 4; - case X86::GS: - return 5; - - case X86::CR0: - return 0; - case X86::CR1: - return 1; - case X86::CR2: - return 2; - case X86::CR3: - return 3; - case X86::CR4: - return 4; - - case X86::DR0: - return 0; - case X86::DR1: - return 1; - case X86::DR2: - return 2; - case X86::DR3: - return 3; - case X86::DR4: - return 4; - case X86::DR5: - return 5; - case X86::DR6: - return 6; - case X86::DR7: - return 7; + case X86::ES: return 0; + case X86::CS: return 1; + case X86::SS: return 2; + case X86::DS: return 3; + case X86::FS: return 4; + case X86::GS: return 5; + + case X86::CR0: case X86::CR8 : case X86::DR0: return 0; + case X86::CR1: case X86::CR9 : case X86::DR1: return 1; + case X86::CR2: case X86::CR10: case X86::DR2: return 2; + case X86::CR3: case X86::CR11: case X86::DR3: return 3; + case X86::CR4: case X86::CR12: case X86::DR4: return 4; + case X86::CR5: case X86::CR13: case X86::DR5: return 5; + case X86::CR6: case X86::CR14: case X86::DR6: return 6; + case X86::CR7: case X86::CR15: case X86::DR7: return 7; // Pseudo index registers are equivalent to a "none" // scaled index (See Intel Manual 2A, table 2-3) @@ -295,9 +270,14 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, } break; case X86::sub_32bit: - if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { + if (B == &X86::GR32RegClass) { if (A->getSize() == 8) return A; + } else if (B == &X86::GR32_NOSPRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass) + return &X86::GR64_NOSPRegClass; + if (A->getSize() == 8) + return getCommonSubClass(A, &X86::GR64_NOSPRegClass); } else if (B == &X86::GR32_ABCDRegClass) { if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || A == &X86::GR64_NOREXRegClass || @@ -336,10 +316,16 @@ X86RegisterInfo::getPointerRegClass(unsigned Kind) const { if (TM.getSubtarget<X86Subtarget>().is64Bit()) return &X86::GR64RegClass; return &X86::GR32RegClass; - case 1: // Normal GRPs except the stack pointer (for encoding reasons). + case 1: // Normal GPRs except the stack pointer (for encoding reasons). if (TM.getSubtarget<X86Subtarget>().is64Bit()) return &X86::GR64_NOSPRegClass; return &X86::GR32_NOSPRegClass; + case 2: // Available for tailcall (not callee-saved GPRs). + if (TM.getSubtarget<X86Subtarget>().isTargetWin64()) + return &X86::GR64_TCW64RegClass; + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return &X86::GR64_TCRegClass; + return &X86::GR32_TCRegClass; } } @@ -408,6 +394,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + // Set the stack-pointer register and its aliases as reserved. Reserved.set(X86::RSP); Reserved.set(X86::ESP); @@ -420,7 +408,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::IP); // Set the frame-pointer register and its aliases as reserved if needed. - if (hasFP(MF)) { + if (TFI->hasFP(MF)) { Reserved.set(X86::RBP); Reserved.set(X86::EBP); Reserved.set(X86::BP); @@ -445,21 +433,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. This is true if the function has variable sized allocas -/// or if frame pointer elimination is disabled. -bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const MachineModuleInfo &MMI = MF.getMMI(); - - return (DisableFramePointerElim(MF) || - needsStackRealignment(MF) || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken() || - MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || - MMI.callsUnwindInit()); -} - bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return (RealignStack && @@ -478,62 +451,25 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { if (0 && requiresRealignment && MFI->hasVarSizedObjects()) report_fatal_error( "Stack realignment in presense of dynamic allocas is not supported"); - + // If we've requested that we force align the stack do so now. if (ForceStackAlign) return canRealignStack(MF); - - return requiresRealignment && canRealignStack(MF); -} -bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const { - return !MF.getFrameInfo()->hasVarSizedObjects(); + return requiresRealignment && canRealignStack(MF); } bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const { - if (Reg == FramePtr && hasFP(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (Reg == FramePtr && TFI->hasFP(MF)) { FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); return true; } return false; } -int -X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); - uint64_t StackSize = MFI->getStackSize(); - - if (needsStackRealignment(MF)) { - if (FI < 0) { - // Skip the saved EBP. - Offset += SlotSize; - } else { - unsigned Align = MFI->getObjectAlignment(FI); - assert((-(Offset + StackSize)) % Align == 0); - Align = 0; - return Offset + StackSize; - } - // FIXME: Support tail calls - } else { - if (!hasFP(MF)) - return Offset + StackSize; - - // Skip the saved EBP. - Offset += SlotSize; - - // Skip the RETADDR move area - const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - if (TailCallReturnAddrDelta < 0) - Offset -= TailCallReturnAddrDelta; - } - - return Offset; -} - static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { if (is64Bit) { if (isInt<8>(Imm)) @@ -561,69 +497,70 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { void X86RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + bool reseveCallFrame = TFI->hasReservedCallFrame(MF); + int Opcode = I->getOpcode(); + bool isDestroy = Opcode == getCallFrameDestroyOpcode(); + DebugLoc DL = I->getDebugLoc(); + uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; + uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; + I = MBB.erase(I); + + if (!reseveCallFrame) { // If the stack pointer can be changed after prologue, turn the // adjcallstackup instruction into a 'sub ESP, <amt>' and the // adjcallstackdown instruction into 'add ESP, <amt>' // TODO: consider using push / pop instead of sub + store / add - MachineInstr *Old = I; - uint64_t Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - - MachineInstr *New = 0; - if (Old->getOpcode() == getCallFrameSetupOpcode()) { - New = BuildMI(MF, Old->getDebugLoc(), - TII.get(getSUBriOpcode(Is64Bit, Amount)), - StackPtr) - .addReg(StackPtr) - .addImm(Amount); - } else { - assert(Old->getOpcode() == getCallFrameDestroyOpcode()); - - // Factor out the amount the callee already popped. - uint64_t CalleeAmt = Old->getOperand(1).getImm(); - Amount -= CalleeAmt; - - if (Amount) { - unsigned Opc = getADDriOpcode(Is64Bit, Amount); - New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(Amount); - } - } + if (Amount == 0) + return; + + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; + + MachineInstr *New = 0; + if (Opcode == getCallFrameSetupOpcode()) { + New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)), + StackPtr) + .addReg(StackPtr) + .addImm(Amount); + } else { + assert(Opcode == getCallFrameDestroyOpcode()); - if (New) { - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); + // Factor out the amount the callee already popped. + Amount -= CalleeAmt; - // Replace the pseudo instruction with a new instruction. - MBB.insert(I, New); + if (Amount) { + unsigned Opc = getADDriOpcode(Is64Bit, Amount); + New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); } } - } else if (I->getOpcode() == getCallFrameDestroyOpcode()) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. We do this until we have - // more advanced stack pointer tracking ability. - if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { - unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); - MachineInstr *Old = I; - MachineInstr *New = - BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), - StackPtr) - .addReg(StackPtr) - .addImm(CalleeAmt); + if (New) { // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); + + // Replace the pseudo instruction with a new instruction. MBB.insert(I, New); } + + return; } - MBB.erase(I); + if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. We do this until we have + // more advanced stack pointer tracking ability. + unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); + MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(CalleeAmt); + + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); + MBB.insert(I, New); + } } void @@ -634,6 +571,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned i = 0; MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); while (!MI.getOperand(i).isFI()) { ++i; @@ -650,7 +588,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else if (AfterFPPop) BasePtr = StackPtr; else - BasePtr = (hasFP(MF) ? FramePtr : StackPtr); + BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); // This must be part of a four operand memory reference. Replace the // FrameIndex with base register with EBP. Add an offset to the offset. @@ -660,11 +598,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FIOffset; if (AfterFPPop) { // Tail call jmp happens after FP is popped. - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); const MachineFrameInfo *MFI = MF.getFrameInfo(); - FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea(); + FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea(); } else - FIOffset = getFrameIndexOffset(MF, FrameIndex); + FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex); if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. @@ -677,710 +614,14 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -void -X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - - if (TailCallReturnAddrDelta < 0) { - // create RETURNADDR area - // arg - // arg - // RETADDR - // { ... - // RETADDR area - // ... - // } - // [EBP] - MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta, true); - } - - if (hasFP(MF)) { - assert((TailCallReturnAddrDelta <= 0) && - "The Delta should always be zero or negative"); - const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - - // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MFI->CreateFixedObject(SlotSize, - -(int)SlotSize + - TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta, - true); - assert(FrameIdx == MFI->getObjectIndexBegin() && - "Slot for EBP register must be last in order to be found!"); - FrameIdx = 0; - } -} - -/// emitSPUpdate - Emit a series of instructions to increment / decrement the -/// stack pointer by a constant value. -static -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, int64_t NumBytes, bool Is64Bit, - const TargetInstrInfo &TII) { - bool isSub = NumBytes < 0; - uint64_t Offset = isSub ? -NumBytes : NumBytes; - unsigned Opc = isSub ? - getSUBriOpcode(Is64Bit, Offset) : - getADDriOpcode(Is64Bit, Offset); - uint64_t Chunk = (1LL << 31) - 1; - DebugLoc DL = MBB.findDebugLoc(MBBI); - - while (Offset) { - uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr) - .addImm(ThisVal); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - Offset -= ThisVal; - } -} - -/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. -static -void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = NULL) { - if (MBBI == MBB.begin()) return; - - MachineBasicBlock::iterator PI = prior(MBBI); - unsigned Opc = PI->getOpcode(); - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && - PI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes += PI->getOperand(2).getImm(); - MBB.erase(PI); - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && - PI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes -= PI->getOperand(2).getImm(); - MBB.erase(PI); - } -} - -/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. -static -void mergeSPUpdatesDown(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = NULL) { - // FIXME: THIS ISN'T RUN!!! - return; - - if (MBBI == MBB.end()) return; - - MachineBasicBlock::iterator NI = llvm::next(MBBI); - if (NI == MBB.end()) return; - - unsigned Opc = NI->getOpcode(); - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && - NI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes -= NI->getOperand(2).getImm(); - MBB.erase(NI); - MBBI = NI; - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && - NI->getOperand(0).getReg() == StackPtr) { - if (NumBytes) - *NumBytes += NI->getOperand(2).getImm(); - MBB.erase(NI); - MBBI = NI; - } -} - -/// mergeSPUpdates - Checks the instruction before/after the passed -/// instruction. If it is an ADD/SUB instruction it is deleted argument and the -/// stack adjustment is returned as a positive value for ADD and a negative for -/// SUB. -static int mergeSPUpdates(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, - bool doMergeWithPrevious) { - if ((doMergeWithPrevious && MBBI == MBB.begin()) || - (!doMergeWithPrevious && MBBI == MBB.end())) - return 0; - - MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; - MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); - unsigned Opc = PI->getOpcode(); - int Offset = 0; - - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || - Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && - PI->getOperand(0).getReg() == StackPtr){ - Offset += PI->getOperand(2).getImm(); - MBB.erase(PI); - if (!doMergeWithPrevious) MBBI = NI; - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || - Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && - PI->getOperand(0).getReg() == StackPtr) { - Offset -= PI->getOperand(2).getImm(); - MBB.erase(PI); - if (!doMergeWithPrevious) MBBI = NI; - } - - return Offset; -} - -void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF, - MCSymbol *Label, - unsigned FramePtr) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - if (CSI.empty()) return; - - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - const TargetData *TD = MF.getTarget().getTargetData(); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = - (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == - TargetFrameInfo::StackGrowsUp ? - TD->getPointerSize() : -TD->getPointerSize()); - - // FIXME: This is dirty hack. The code itself is pretty mess right now. - // It should be rewritten from scratch and generalized sometimes. - - // Determine maximum offset (minumum due to stack growth). - int64_t MaxOffset = 0; - for (std::vector<CalleeSavedInfo>::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) - MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(I->getFrameIdx())); - - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; - for (std::vector<CalleeSavedInfo>::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = I->getReg(); - Offset = MaxOffset - Offset + saveAreaOffset; - - // Don't output a new machine move if we're re-saving the frame - // pointer. This happens when the PrologEpilogInserter has inserted an extra - // "PUSH" of the frame pointer -- the "emitPrologue" method automatically - // generates one when frame pointers are used. If we generate a "machine - // move" for this extra "PUSH", the linker will lose track of the fact that - // the frame pointer should have the value of the first "PUSH" when it's - // trying to unwind. - // - // FIXME: This looks inelegant. It's possibly correct, but it's covering up - // another bug. I.e., one where we generate a prolog like this: - // - // pushl %ebp - // movl %esp, %ebp - // pushl %ebp - // pushl %esi - // ... - // - // The immediate re-push of EBP is unnecessary. At the least, it's an - // optimization bug. EBP can be used as a scratch register in certain - // cases, but probably not when we have a frame pointer. - if (HasFP && FramePtr == Reg) - continue; - - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(Label, CSDst, CSSrc)); - } -} - -/// emitPrologue - Push callee-saved registers onto the stack, which -/// automatically adjust the stack pointer. Adjust the stack pointer to allocate -/// space for local variables. Also emit labels used by the exception handler to -/// generate the exception handling frames. -void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const Function *Fn = MF.getFunction(); - const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget<X86Subtarget>(); - MachineModuleInfo &MMI = MF.getMMI(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - bool needsFrameMoves = MMI.hasDebugInfo() || - !Fn->doesNotThrow() || UnwindTablesMandatory; - uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. - uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. - bool HasFP = hasFP(MF); - DebugLoc DL; - - // If we're forcing a stack realignment we can't rely on just the frame - // info, we need to know the ABI stack alignment as well in case we - // have a call out. Otherwise just make sure we have some alignment - we'll - // go with the minimum SlotSize. - if (ForceStackAlign) { - if (MFI->hasCalls()) - MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; - else if (MaxAlign < SlotSize) - MaxAlign = SlotSize; - } - - // Add RETADDR move area to callee saved frame size. - int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - if (TailCallReturnAddrDelta < 0) - X86FI->setCalleeSavedFrameSize( - X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - - // If this is x86-64 and the Red Zone is not disabled, if we are a leaf - // function, and use up to 128 bytes of stack space, don't have a frame - // pointer, calls, or dynamic alloca then we do not need to adjust the - // stack pointer (we fit in the Red Zone). - if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && - !needsStackRealignment(MF) && - !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. - !Subtarget->isTargetWin64()) { // Win64 has no Red Zone - uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); - if (HasFP) MinSize += SlotSize; - StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); - MFI->setStackSize(StackSize); - } else if (Subtarget->isTargetWin64()) { - // We need to always allocate 32 bytes as register spill area. - // FIXME: We might reuse these 32 bytes for leaf functions. - StackSize += 32; - MFI->setStackSize(StackSize); - } - - // Insert stack pointer adjustment for later moving of return addr. Only - // applies to tail call optimized functions where the callee argument stack - // size is bigger than the callers. - if (TailCallReturnAddrDelta < 0) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), - StackPtr) - .addReg(StackPtr) - .addImm(-TailCallReturnAddrDelta); - MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - } - - // Mapping for machine moves: - // - // DST: VirtualFP AND - // SRC: VirtualFP => DW_CFA_def_cfa_offset - // ELSE => DW_CFA_def_cfa - // - // SRC: VirtualFP AND - // DST: Register => DW_CFA_def_cfa_register - // - // ELSE - // OFFSET < 0 => DW_CFA_offset_extended_sf - // REG < 64 => DW_CFA_offset + Reg - // ELSE => DW_CFA_offset_extended - - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - const TargetData *TD = MF.getTarget().getTargetData(); - uint64_t NumBytes = 0; - int stackGrowth = -TD->getPointerSize(); - - if (HasFP) { - // Calculate required stack adjustment. - uint64_t FrameSize = StackSize - SlotSize; - if (needsStackRealignment(MF)) - FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; - - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); - - // Get the offset of the stack slot for the EBP register, which is - // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. - // Update the frame offset adjustment. - MFI->setOffsetAdjustment(-NumBytes); - - // Save EBP/RBP into the appropriate stack slot. - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) - .addReg(FramePtr, RegState::Kill); - - if (needsFrameMoves) { - // Mark the place where EBP/RBP was saved. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); - - // Define the current CFA rule to use the provided offset. - if (StackSize) { - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - } else { - // FIXME: Verify & implement for FP - MachineLocation SPDst(StackPtr); - MachineLocation SPSrc(StackPtr, stackGrowth); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - } - - // Change the rule for the FramePtr to be an "offset" rule. - MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth); - MachineLocation FPSrc(FramePtr); - Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); - } - - // Update EBP with the new base value... - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) - .addReg(StackPtr); - - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); - - // Define the current CFA to use the EBP/RBP register. - MachineLocation FPDst(FramePtr); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); - } - - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); - I != E; ++I) - I->addLiveIn(FramePtr); - - // Realign stack - if (needsStackRealignment(MF)) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), - StackPtr).addReg(StackPtr).addImm(-MaxAlign); - - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); - } - } else { - NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); - } - - // Skip the callee-saved push instructions. - bool PushedRegs = false; - int StackOffset = 2 * stackGrowth; - - while (MBBI != MBB.end() && - (MBBI->getOpcode() == X86::PUSH32r || - MBBI->getOpcode() == X86::PUSH64r)) { - PushedRegs = true; - ++MBBI; - - if (!HasFP && needsFrameMoves) { - // Mark callee-saved push instruction. - MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); - - // Define the current CFA rule to use the provided offset. - unsigned Ptr = StackSize ? - MachineLocation::VirtualFP : StackPtr; - MachineLocation SPDst(Ptr); - MachineLocation SPSrc(Ptr, StackOffset); - Moves.push_back(MachineMove(Label, SPDst, SPSrc)); - StackOffset += stackGrowth; - } - } - - DL = MBB.findDebugLoc(MBBI); - - // Adjust stack pointer: ESP -= numbytes. - - // Windows and cygwin/mingw require a prologue helper routine when allocating - // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw - // uses __alloca. __alloca and the 32-bit version of __chkstk will probe - // the stack and adjust the stack pointer in one go. The 64-bit version - // of __chkstk is only responsible for probing the stack. The 64-bit - // prologue is responsible for adjusting the stack pointer. Touching the - // stack at 4K increments is necessary to ensure that the guard pages used - // by the OS virtual memory manager are allocated in correct sequence. - if (NumBytes >= 4096 && - (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) { - // Check, whether EAX is livein for this function. - bool isEAXAlive = false; - for (MachineRegisterInfo::livein_iterator - II = MF.getRegInfo().livein_begin(), - EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { - unsigned Reg = II->first; - isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL); - } - - - const char *StackProbeSymbol = - Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; - if (!isEAXAlive) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } else { - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill); - - // Allocate NumBytes-4 bytes on stack. We'll also use 4 already - // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes - 4); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); - MBB.insert(MBBI, MI); - } - } else if (NumBytes) { - // If there is an SUB32ri of ESP immediately before this instruction, merge - // the two. This can be the case when tail call elimination is enabled and - // the callee has more arguments then the caller. - NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); - - // If there is an ADD32ri or SUB32ri of ESP immediately after this - // instruction, merge the two instructions. - mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); - - if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); - } - - if ((NumBytes || PushedRegs) && needsFrameMoves) { - // Mark end of stack pointer adjustment. - MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); - - if (!HasFP && NumBytes) { - // Define the current CFA rule to use the provided offset. - if (StackSize) { - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, - -StackSize + stackGrowth); - Moves.push_back(MachineMove(Label, SPDst, SPSrc)); - } else { - // FIXME: Verify & implement for FP - MachineLocation SPDst(StackPtr); - MachineLocation SPSrc(StackPtr, stackGrowth); - Moves.push_back(MachineMove(Label, SPDst, SPSrc)); - } - } - - // Emit DWARF info specifying the offsets of the callee-saved registers. - if (PushedRegs) - emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); - } -} - -void X86RegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - unsigned RetOpcode = MBBI->getOpcode(); - DebugLoc DL = MBBI->getDebugLoc(); - - switch (RetOpcode) { - default: - llvm_unreachable("Can only insert epilog into returning blocks"); - case X86::RET: - case X86::RETI: - case X86::TCRETURNdi: - case X86::TCRETURNri: - case X86::TCRETURNmi: - case X86::TCRETURNdi64: - case X86::TCRETURNri64: - case X86::TCRETURNmi64: - case X86::EH_RETURN: - case X86::EH_RETURN64: - break; // These are ok - } - - // Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI->getStackSize(); - uint64_t MaxAlign = MFI->getMaxAlignment(); - unsigned CSSize = X86FI->getCalleeSavedFrameSize(); - uint64_t NumBytes = 0; - - // If we're forcing a stack realignment we can't rely on just the frame - // info, we need to know the ABI stack alignment as well in case we - // have a call out. Otherwise just make sure we have some alignment - we'll - // go with the minimum. - if (ForceStackAlign) { - if (MFI->hasCalls()) - MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; - else - MaxAlign = MaxAlign ? MaxAlign : 4; - } - - if (hasFP(MF)) { - // Calculate required stack adjustment. - uint64_t FrameSize = StackSize - SlotSize; - if (needsStackRealignment(MF)) - FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; - - NumBytes = FrameSize - CSSize; - - // Pop EBP. - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); - } else { - NumBytes = StackSize - CSSize; - } - - // Skip the callee-saved pop instructions. - MachineBasicBlock::iterator LastCSPop = MBBI; - while (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PI = prior(MBBI); - unsigned Opc = PI->getOpcode(); - - if (Opc != X86::POP32r && Opc != X86::POP64r && - !PI->getDesc().isTerminator()) - break; - - --MBBI; - } - - DL = MBBI->getDebugLoc(); - - // If there is an ADD32ri or SUB32ri of ESP immediately before this - // instruction, merge the two instructions. - if (NumBytes || MFI->hasVarSizedObjects()) - mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); - - // If dynamic alloca is used, then reset esp to point to the last callee-saved - // slot before popping them off! Same applies for the case, when stack was - // realigned. - if (needsStackRealignment(MF)) { - // We cannot use LEA here, because stack pointer was realigned. We need to - // deallocate local frame back. - if (CSSize) { - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); - MBBI = prior(LastCSPop); - } - - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), - StackPtr).addReg(FramePtr); - } else if (MFI->hasVarSizedObjects()) { - if (CSSize) { - unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; - MachineInstr *MI = - addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); - MBB.insert(MBBI, MI); - } else { - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) - .addReg(FramePtr); - } - } else if (NumBytes) { - // Adjust stack pointer back: ESP += numbytes. - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); - } - - // We're returning from function via eh_return. - if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { - MBBI = prior(MBB.end()); - MachineOperand &DestAddr = MBBI->getOperand(0); - assert(DestAddr.isReg() && "Offset should be in register!"); - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), - StackPtr).addReg(DestAddr.getReg()); - } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || - RetOpcode == X86::TCRETURNmi || - RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || - RetOpcode == X86::TCRETURNmi64) { - bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; - // Tail call return: adjust the stack pointer and jump to callee. - MBBI = prior(MBB.end()); - MachineOperand &JumpTarget = MBBI->getOperand(0); - MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); - assert(StackAdjust.isImm() && "Expecting immediate value."); - - // Adjust stack pointer. - int StackAdj = StackAdjust.getImm(); - int MaxTCDelta = X86FI->getTCReturnAddrDelta(); - int Offset = 0; - assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); - - // Incoporate the retaddr area. - Offset = StackAdj-MaxTCDelta; - assert(Offset >= 0 && "Offset should never be negative"); - - if (Offset) { - // Check for possible merge with preceeding ADD instruction. - Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII); - } - - // Jump to label or value in register. - if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) - ? X86::TAILJMPd : X86::TAILJMPd64)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) - ? X86::TAILJMPm : X86::TAILJMPm64)); - for (unsigned i = 0; i != 5; ++i) - MIB.addOperand(MBBI->getOperand(i)); - } else if (RetOpcode == X86::TCRETURNri64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). - addReg(JumpTarget.getReg(), RegState::Kill); - } else { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). - addReg(JumpTarget.getReg(), RegState::Kill); - } - - MachineInstr *NewMI = prior(MBBI); - for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) - NewMI->addOperand(MBBI->getOperand(i)); - - // Delete the pseudo instruction TCRETURN. - MBB.erase(MBBI); - } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && - (X86FI->getTCReturnAddrDelta() < 0)) { - // Add the return addr area delta back since we are not tail calling. - int delta = -1*X86FI->getTCReturnAddrDelta(); - MBBI = prior(MBB.end()); - - // Check for possible merge with preceeding ADD instruction. - delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); - emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); - } -} - unsigned X86RegisterInfo::getRARegister() const { return Is64Bit ? X86::RIP // Should have dwarf #16. : X86::EIP; // Should have dwarf #8. } unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return hasFP(MF) ? FramePtr : StackPtr; -} - -void -X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const { - // Calculate amount of bytes used for return address storing - int stackGrowth = (Is64Bit ? -8 : -4); - - // Initial state of the frame pointer is esp+stackGrowth. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(StackPtr, stackGrowth); - Moves.push_back(MachineMove(0, Dst, Src)); - - // Add return address to move list - MachineLocation CSDst(StackPtr, stackGrowth); - MachineLocation CSSrc(getRARegister()); - Moves.push_back(MachineMove(0, CSDst, CSSrc)); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + return TFI->hasFP(MF) ? FramePtr : StackPtr; } unsigned X86RegisterInfo::getEHExceptionRegister() const { @@ -1579,13 +820,13 @@ namespace { // Be over-conservative: scan over all vreg defs and find whether vector // registers are used. If yes, there is a possibility that vector register // will be spilled and thus require dynamic stack realignment. - for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; - RegNum < RI.getLastVirtReg(); ++RegNum) - if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) { + for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) { FuncInfo->setReserveFP(true); return true; } - + } // Nothing to do return false; } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 527df05..064be64 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -111,14 +111,10 @@ public: /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const; - bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; @@ -129,19 +125,12 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, RegScavenger *RS = NULL) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - - void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label, - unsigned FramePtr) const; - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; + unsigned getStackRegister() const { return StackPtr; } + // FIXME: Move to FrameInfok + unsigned getSlotSize() const { return SlotSize; } // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 95269b1..612fac2 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -1,10 +1,10 @@ //===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 Register file, defining the registers themselves, @@ -34,8 +34,8 @@ let Namespace = "X86" in { // because the register file generator is smart enough to figure out that // AL aliases AX if we tell it that AX aliased AL (for example). - // Dwarf numbering is different for 32-bit and 64-bit, and there are - // variations by target as well. Currently the first entry is for X86-64, + // Dwarf numbering is different for 32-bit and 64-bit, and there are + // variations by target as well. Currently the first entry is for X86-64, // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux // and debug information on X86-32/Darwin) @@ -81,7 +81,7 @@ let Namespace = "X86" in { def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>; } def IP : Register<"ip">, DwarfRegNum<[16]>; - + // X86-64 only let SubRegIndices = [sub_8bit] in { def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>; @@ -103,8 +103,8 @@ let Namespace = "X86" in { def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>; def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>; def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>; - def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>; - + def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>; + // X86-64 only def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>; def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>; @@ -208,7 +208,7 @@ let Namespace = "X86" in { def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>; def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>; def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>; - def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; + def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>; // Status flags register def EFLAGS : Register<"flags">; @@ -220,7 +220,7 @@ let Namespace = "X86" in { def ES : Register<"es">; def FS : Register<"fs">; def GS : Register<"gs">; - + // Debug registers def DR0 : Register<"dr0">; def DR1 : Register<"dr1">; @@ -230,8 +230,8 @@ let Namespace = "X86" in { def DR5 : Register<"dr5">; def DR6 : Register<"dr6">; def DR7 : Register<"dr7">; - - // Condition registers + + // Control registers def CR0 : Register<"cr0">; def CR1 : Register<"cr1">; def CR2 : Register<"cr2">; @@ -241,6 +241,13 @@ let Namespace = "X86" in { def CR6 : Register<"cr6">; def CR7 : Register<"cr7">; def CR8 : Register<"cr8">; + def CR9 : Register<"cr9">; + def CR10 : Register<"cr10">; + def CR11 : Register<"cr11">; + def CR12 : Register<"cr12">; + def CR13 : Register<"cr13">; + def CR14 : Register<"cr14">; + def CR15 : Register<"cr15">; // Pseudo index registers def EIZ : Register<"eiz">; @@ -254,10 +261,10 @@ let Namespace = "X86" in { // implicitly defined to be the register allocation order. // -// List call-clobbered registers before callee-save registers. RBX, RBP, (and +// List call-clobbered registers before callee-save registers. RBX, RBP, (and // R12, R13, R14, and R15 for X86-64) are callee-save registers. // In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and -// R8B, ... R15B. +// R8B, ... R15B. // Allocate R12 and R13 last, as these require an extra byte when // encoded in x86_64 instructions. // FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in @@ -292,14 +299,14 @@ def GR8 : RegisterClass<"X86", [i8], 8, GR8Class::iterator GR8Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) // In 32-mode, none of the 8-bit registers aliases EBP or ESP. return begin() + 8; - else if (RI->hasFP(MF) || MFI->getReserveFP()) + else if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SPL or BPL. return array_endof(X86_GR8_AO_64) - 1; else @@ -337,12 +344,12 @@ def GR16 : RegisterClass<"X86", [i16], 16, GR16Class::iterator GR16Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return array_endof(X86_GR16_AO_64) - 1; else @@ -350,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, return array_endof(X86_GR16_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return begin() + 6; else @@ -389,12 +396,12 @@ def GR32 : RegisterClass<"X86", [i32], 32, GR32Class::iterator GR32Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return array_endof(X86_GR32_AO_64) - 1; else @@ -402,7 +409,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, return array_endof(X86_GR32_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return begin() + 6; else @@ -429,13 +436,13 @@ def GR64 : RegisterClass<"X86", [i64], 64, GR64Class::iterator GR64Class::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) return end()-3; // If so, don't allocate RIP, RSP or RBP else return end()-2; // If not, just don't allocate RIP or RSP @@ -446,18 +453,16 @@ def GR64 : RegisterClass<"X86", [i64], 64, // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. -def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { -} +def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>; // Debug registers. def DEBUG_REG : RegisterClass<"X86", [i32], 32, - [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> { -} + [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>; // Control registers. def CONTROL_REG : RegisterClass<"X86", [i64], 64, - [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> { -} + [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8, + CR9, CR10, CR11, CR12, CR13, CR14, CR15]>; // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" @@ -465,10 +470,8 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64, // that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD, // and GR64_ABCD are classes for registers that support 8-bit h-register // operations. -def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { -} -def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> { -} +def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>; +def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>; def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)]; } @@ -493,6 +496,9 @@ def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, (GR32_TC sub_32bit)]; } +def GR64_TCW64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, + R8, R9, R11]>; + // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, [AL, CL, DL, AH, CH, DH, BL, BH]> { @@ -538,10 +544,10 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, GR16_NOREXClass::iterator GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return end() - 2; else @@ -562,10 +568,10 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, GR32_NOREXClass::iterator GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return end() - 2; else @@ -587,10 +593,10 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, GR64_NOREXClass::iterator GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RIP, RSP or RBP. return end() - 3; else @@ -629,12 +635,12 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, GR32_NOSPClass::iterator GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return array_endof(X86_GR32_NOSP_AO_64) - 1; else @@ -642,7 +648,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, return array_endof(X86_GR32_NOSP_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return begin() + 6; else @@ -667,13 +673,13 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, GR64_NOSPClass::iterator GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) return end()-1; // If so, don't allocate RBP else return end(); // If not, any reg in this class is ok. @@ -695,10 +701,10 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF) || MFI->getReserveFP()) + if (TFI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RBP. return end() - 1; else @@ -784,7 +790,7 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, } // Generic vector registers: VR64 and VR128. -def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64, +def VR64: RegisterClass<"X86", [x86mmx], 64, [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 6297a27..42e8193 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -32,10 +32,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const { + MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + // If to a segment-relative address space, use the default lowering. + if (DstPtrInfo.getAddrSpace() >= 256) + return SDValue(); + // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. @@ -133,7 +136,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Dst, InFlag); InFlag = Chain.getValue(1); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); @@ -147,7 +150,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, X86::ECX, Left, InFlag); InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Flag); + Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); } else if (BytesLeft) { @@ -161,7 +164,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstSV, DstSVOff + Offset); + Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. @@ -173,10 +176,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { // This requires the copy size to be a constant, preferrably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -186,14 +187,29 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); - /// If not DWORD aligned, call the library. - if ((Align & 3) != 0) + /// If not DWORD aligned, it is more efficient to call the library. However + /// if calling the library is not allowed (AlwaysInline), then soldier on as + /// the code generated here is better than the long load-store sequence we + /// would otherwise get. + if (!AlwaysInline && (Align & 3) != 0) + return SDValue(); + + // If to a segment-relative address space, use the default lowering. + if (DstPtrInfo.getAddrSpace() >= 256 || + SrcPtrInfo.getAddrSpace() >= 256) return SDValue(); - // DWORD aligned - EVT AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; + MVT AVT; + if (Align & 1) + AVT = MVT::i8; + else if (Align & 2) + AVT = MVT::i16; + else if (Align & 4) + // DWORD aligned + AVT = MVT::i32; + else + // QWORD aligned + AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; @@ -214,7 +230,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Src, InFlag); InFlag = Chain.getValue(1); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, array_lengthof(Ops)); @@ -234,8 +250,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), Align, isVolatile, AlwaysInline, - DstSV, DstSVOff + Offset, - SrcSV, SrcSVOff + Offset)); + DstPtrInfo.getWithOffset(Offset), + SrcPtrInfo.getWithOffset(Offset))); } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h index 4f30f31..d1d66fe 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.h +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -39,8 +39,7 @@ public: SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const; + MachinePointerInfo DstPtrInfo) const; virtual SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, @@ -48,10 +47,8 @@ public: SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; }; } diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/X86ShuffleDecode.h deleted file mode 100644 index df04052..0000000 --- a/lib/Target/X86/X86ShuffleDecode.h +++ /dev/null @@ -1,155 +0,0 @@ -//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Define several functions to decode x86 specific shuffle semantics into a -// generic vector mask. -// -//===----------------------------------------------------------------------===// - -#ifndef X86_SHUFFLE_DECODE_H -#define X86_SHUFFLE_DECODE_H - -#include "llvm/ADT/SmallVector.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Vector Mask Decoding -//===----------------------------------------------------------------------===// - -enum { - SM_SentinelZero = ~0U -}; - -static inline -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { - // Defaults the copying the dest value. - ShuffleMask.push_back(0); - ShuffleMask.push_back(1); - ShuffleMask.push_back(2); - ShuffleMask.push_back(3); - - // Decode the immediate. - unsigned ZMask = Imm & 15; - unsigned CountD = (Imm >> 4) & 3; - unsigned CountS = (Imm >> 6) & 3; - - // CountS selects which input element to use. - unsigned InVal = 4+CountS; - // CountD specifies which element of destination to update. - ShuffleMask[CountD] = InVal; - // ZMask zaps values, potentially overriding the CountD elt. - if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; - if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; - if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; - if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; -} - -// <3,1> or <6,7,2,3> -static void DecodeMOVHLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = NElts/2; i != NElts; ++i) - ShuffleMask.push_back(NElts+i); - - for (unsigned i = NElts/2; i != NElts; ++i) - ShuffleMask.push_back(i); -} - -// <0,2> or <0,1,4,5> -static void DecodeMOVLHPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) - ShuffleMask.push_back(i); - - for (unsigned i = 0; i != NElts/2; ++i) - ShuffleMask.push_back(NElts+i); -} - -static void DecodePSHUFMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts; ++i) { - ShuffleMask.push_back(Imm % NElts); - Imm /= NElts; - } -} - -static void DecodePSHUFHWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - ShuffleMask.push_back(0); - ShuffleMask.push_back(1); - ShuffleMask.push_back(2); - ShuffleMask.push_back(3); - for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back(4+(Imm & 3)); - Imm >>= 2; - } -} - -static void DecodePSHUFLWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back((Imm & 3)); - Imm >>= 2; - } - ShuffleMask.push_back(4); - ShuffleMask.push_back(5); - ShuffleMask.push_back(6); - ShuffleMask.push_back(7); -} - -static void DecodePUNPCKLMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i); - ShuffleMask.push_back(i+NElts); - } -} - -static void DecodePUNPCKHMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i+NElts/2); - ShuffleMask.push_back(i+NElts+NElts/2); - } -} - -static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - // Part that reads from dest. - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(Imm % NElts); - Imm /= NElts; - } - // Part that reads from src. - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(Imm % NElts + NElts); - Imm /= NElts; - } -} - -static void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i+NElts/2); // Reads from dest - ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src - } -} - - -/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. NElts indicates the number of elements in the vector allowing it to -/// handle different datatypes and vector widths. -static void DecodeUNPCKLPMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i); // Reads from dest - ShuffleMask.push_back(i+NElts); // Reads from src - } -} - -#endif diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 0d02e5e..de76856 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -1,4 +1,4 @@ -//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===// +//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -18,7 +18,7 @@ #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Host.h" +#include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallVector.h" @@ -256,13 +256,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if ((ECX >> 9) & 1) X86SSELevel = SSSE3; if ((ECX >> 19) & 1) X86SSELevel = SSE41; if ((ECX >> 20) & 1) X86SSELevel = SSE42; + // FIXME: AVX codegen support is not ready. + //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); - HasAVX = ((ECX >> 28) & 0x1); HasAES = IsIntel && ((ECX >> 25) & 0x1); if (IsIntel || IsAMD) { @@ -289,6 +290,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , X863DNowLevel(NoThreeDNow) , HasCMov(false) , HasX86_64(false) + , HasPOPCNT(false) , HasSSE4A(false) , HasAVX(false) , HasAES(false) @@ -315,11 +317,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, ParseSubtargetFeatures(FS, CPU); // All X86-64 CPUs also have SSE2, however user might request no SSE via // -mattr, so don't force SSELevel here. + if (HasAVX) + X86SSELevel = NoMMXSSE; } else { // Otherwise, use CPUID to auto-detect feature set. AutoDetectSubtargetFeatures(); // Make sure SSE2 is enabled; it is available on all X86-64 CPUs. - if (Is64Bit && X86SSELevel < SSE2) + if (Is64Bit && !HasAVX && X86SSELevel < SSE2) X86SSELevel = SSE2; } @@ -338,9 +342,9 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64 - // bit targets. - if (isTargetDarwin() || Is64Bit) + // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and + // for all 64-bit targets. + if (isTargetDarwin() || isTargetLinux() || Is64Bit) stackAlignment = 16; if (StackAlignment) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 0ee91ab..8a119b4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -65,6 +65,9 @@ protected: /// bool HasX86_64; + /// HasPOPCNT - True if the processor supports POPCNT. + bool HasPOPCNT; + /// HasSSE4A - True if the processor supports SSE4A instructions. bool HasSSE4A; @@ -100,7 +103,7 @@ protected: /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. /// unsigned MaxInlineSizeThreshold; - + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -150,7 +153,10 @@ public: bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } + bool hasPOPCNT() const { return HasPOPCNT; } bool hasAVX() const { return HasAVX; } + bool hasXMM() const { return hasSSE1() || hasAVX(); } + bool hasXMMInt() const { return hasSSE2() || hasAVX(); } bool hasAES() const { return HasAES; } bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } @@ -160,23 +166,21 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } - + // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". - bool isTargetELF() const { + bool isTargetELF() const { return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing(); } bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } - bool isTargetMingw() const { - return TargetTriple.getOS() == Triple::MinGW32 || - TargetTriple.getOS() == Triple::MinGW64; } + bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; } bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; } bool isTargetCygMing() const { return isTargetMingw() || isTargetCygwin(); } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { return isTargetMingw() || isTargetCygwin() || isTargetWindows(); @@ -186,22 +190,12 @@ public: return Is64Bit && (isTargetMingw() || isTargetWindows()); } - bool isTargetWin32() const { - return !Is64Bit && (isTargetMingw() || isTargetWindows()); + bool isTargetEnvMacho() const { + return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO); } - std::string getDataLayout() const { - const char *p; - if (is64Bit()) - p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"; - else if (isTargetDarwin()) - p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; - else if (isTargetMingw() || isTargetWindows()) - p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32"; - else - p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; - - return std::string(p); + bool isTargetWin32() const { + return !Is64Bit && (isTargetMingw() || isTargetWindows()); } bool isPICStyleSet() const { return PICStyle != PICStyles::None; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index ce8636eb..889c824 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -30,10 +30,12 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { case Triple::Darwin: return new X86MCAsmInfoDarwin(TheTriple); case Triple::MinGW32: - case Triple::MinGW64: case Triple::Cygwin: case Triple::Win32: - return new X86MCAsmInfoCOFF(TheTriple); + if (TheTriple.getEnvironment() == Triple::MachO) + return new X86MCAsmInfoDarwin(TheTriple); + else + return new X86MCAsmInfoCOFF(TheTriple); default: return new X86ELFMCAsmInfo(TheTriple); } @@ -43,22 +45,25 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCContext &Ctx, TargetAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter, - bool RelaxAll) { + bool RelaxAll, + bool NoExecStack) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); case Triple::MinGW32: - case Triple::MinGW64: case Triple::Cygwin: case Triple::Win32: - return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); + if (TheTriple.getEnvironment() == Triple::MachO) + return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + else + return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); default: - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); } } -extern "C" void LLVMInitializeX86Target() { +extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target); @@ -89,28 +94,38 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, const std::string &FS) - : X86TargetMachine(T, TT, FS, false) { + : X86TargetMachine(T, TT, FS, false), + DataLayout(getSubtargetImpl()->isTargetDarwin() ? + "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" : + (getSubtargetImpl()->isTargetCygMing() || + getSubtargetImpl()->isTargetWindows()) ? + "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"), + InstrInfo(*this), + TSInfo(*this), + TLInfo(*this), + JITInfo(*this) { } X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, const std::string &FS) - : X86TargetMachine(T, TT, FS, true) { + : X86TargetMachine(T, TT, FS, true), + DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"), + InstrInfo(*this), + TSInfo(*this), + TLInfo(*this), + JITInfo(*this) { } /// X86TargetMachine ctor - Create an X86 target. /// -X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, +X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT), + : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64Bit), - DataLayout(Subtarget.getDataLayout()), - FrameInfo(TargetFrameInfo::StackGrowsDown, - Subtarget.getStackAlignment(), - (Subtarget.isTargetWin64() ? -40 : - (Subtarget.is64Bit() ? -8 : -4))), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this), - ELFWriterInfo(*this) { + FrameLowering(*this, Subtarget), + ELFWriterInfo(is64Bit, true) { DefRelocModel = getRelocationModel(); // If no relocation model was picked, default as appropriate for the target. @@ -217,12 +232,12 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { // FIXME: Move this to TargetJITInfo! // On Darwin, do not override 64-bit setting made in X86TargetMachine(). - if (DefRelocModel == Reloc::Default && + if (DefRelocModel == Reloc::Default && (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { setRelocationModel(Reloc::Static); Subtarget.setPICStyle(PICStyles::None); } - + PM.add(createX86JITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index f9fb424..5973922 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -14,16 +14,17 @@ #ifndef X86TARGETMACHINE_H #define X86TARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "X86.h" #include "X86ELFWriterInfo.h" #include "X86InstrInfo.h" -#include "X86JITInfo.h" -#include "X86Subtarget.h" #include "X86ISelLowering.h" +#include "X86FrameLowering.h" +#include "X86JITInfo.h" #include "X86SelectionDAGInfo.h" +#include "X86Subtarget.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -31,12 +32,7 @@ class formatted_raw_ostream; class X86TargetMachine : public LLVMTargetMachine { X86Subtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - TargetFrameInfo FrameInfo; - X86InstrInfo InstrInfo; - X86JITInfo JITInfo; - X86TargetLowering TLInfo; - X86SelectionDAGInfo TSInfo; + X86FrameLowering FrameLowering; X86ELFWriterInfo ELFWriterInfo; Reloc::Model DefRelocModel; // Reloc model before it's overridden. @@ -49,20 +45,25 @@ public: X86TargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool is64Bit); - virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } - virtual X86JITInfo *getJITInfo() { return &JITInfo; } + virtual const X86InstrInfo *getInstrInfo() const { + llvm_unreachable("getInstrInfo not implemented"); + } + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + virtual X86JITInfo *getJITInfo() { + llvm_unreachable("getJITInfo not implemented"); + } virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; } - virtual const X86TargetLowering *getTargetLowering() const { - return &TLInfo; + virtual const X86TargetLowering *getTargetLowering() const { + llvm_unreachable("getTargetLowering not implemented"); } virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { - return &TSInfo; + llvm_unreachable("getSelectionDAGInfo not implemented"); } virtual const X86RegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); + return &getInstrInfo()->getRegisterInfo(); } - virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86ELFWriterInfo *getELFWriterInfo() const { return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } @@ -79,17 +80,53 @@ public: /// X86_32TargetMachine - X86 32-bit target machine. /// class X86_32TargetMachine : public X86TargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + X86InstrInfo InstrInfo; + X86SelectionDAGInfo TSInfo; + X86TargetLowering TLInfo; + X86JITInfo JITInfo; public: X86_32TargetMachine(const Target &T, const std::string &M, const std::string &FS); + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const X86TargetLowering *getTargetLowering() const { + return &TLInfo; + } + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const X86InstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual X86JITInfo *getJITInfo() { + return &JITInfo; + } }; /// X86_64TargetMachine - X86 64-bit target machine. /// class X86_64TargetMachine : public X86TargetMachine { + const TargetData DataLayout; // Calculates type size & alignment + X86InstrInfo InstrInfo; + X86SelectionDAGInfo TSInfo; + X86TargetLowering TLInfo; + X86JITInfo JITInfo; public: X86_64TargetMachine(const Target &T, const std::string &TT, const std::string &FS); + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const X86TargetLowering *getTargetLowering() const { + return &TLInfo; + } + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const X86InstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual X86JITInfo *getJITInfo() { + return &JITInfo; + } }; } // End llvm namespace diff --git a/lib/Target/XCore/AsmPrinter/CMakeLists.txt b/lib/Target/XCore/AsmPrinter/CMakeLists.txt deleted file mode 100644 index 7c7c2f4..0000000 --- a/lib/Target/XCore/AsmPrinter/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMXCoreAsmPrinter - XCoreAsmPrinter.cpp - ) -add_dependencies(LLVMXCoreAsmPrinter XCoreCodeGenTable_gen) diff --git a/lib/Target/XCore/AsmPrinter/Makefile b/lib/Target/XCore/AsmPrinter/Makefile deleted file mode 100644 index 581f736..0000000 --- a/lib/Target/XCore/AsmPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMXCoreAsmPrinter - -# Hack: we need to include 'main' XCore target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp deleted file mode 100644 index 8f06dd3..0000000 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ /dev/null @@ -1,280 +0,0 @@ -//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to the XAS-format XCore assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "XCore.h" -#include "XCoreInstrInfo.h" -#include "XCoreSubtarget.h" -#include "XCoreMCAsmInfo.h" -#include "XCoreTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cctype> -using namespace llvm; - -static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional, - cl::desc("Maximum number of threads (for emulation thread-local storage)"), - cl::Hidden, - cl::value_desc("number"), - cl::init(8)); - -namespace { - class XCoreAsmPrinter : public AsmPrinter { - const XCoreSubtarget &Subtarget; - public: - explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){} - - virtual const char *getPassName() const { - return "XCore Assembly Printer"; - } - - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, - const std::string &directive = ".jmptable"); - void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) { - printInlineJT(MI, opNum, O, ".jmptable32"); - } - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV); - virtual void EmitGlobalVariable(const GlobalVariable *GV); - - void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd. - static const char *getRegisterName(unsigned RegNo); - - void EmitFunctionEntryLabel(); - void EmitInstruction(const MachineInstr *MI); - void EmitFunctionBodyEnd(); - }; -} // end of anonymous namespace - -#include "XCoreGenAsmWriter.inc" - -void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) { - assert(((GV->hasExternalLinkage() || - GV->hasWeakLinkage()) || - GV->hasLinkOnceLinkage()) && "Unexpected linkage"); - if (const ArrayType *ATy = dyn_cast<ArrayType>( - cast<PointerType>(GV->getType())->getElementType())) { - OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); - // FIXME: MCStreamerize. - OutStreamer.EmitRawText(StringRef(".globound")); - OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName())); - OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements())); - if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) { - // TODO Use COMDAT groups for LinkOnceLinkage - OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+ - ".globound"); - } - } -} - -void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { - // Check to see if this is a special global used by LLVM, if so, emit it. - if (!GV->hasInitializer() || - EmitSpecialLLVMGlobal(GV)) - return; - - const TargetData *TD = TM.getTargetData(); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM)); - - - MCSymbol *GVSym = Mang->getSymbol(GV); - Constant *C = GV->getInitializer(); - unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType()); - - // Mark the start of the global - OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," + - GVSym->getName()); - - switch (GV->getLinkage()) { - case GlobalValue::AppendingLinkage: - report_fatal_error("AppendingLinkage is not supported by this target!"); - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: - case GlobalValue::ExternalLinkage: - emitArrayBound(GVSym, GV); - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - - // TODO Use COMDAT groups for LinkOnceLinkage - if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); - // FALL THROUGH - case GlobalValue::InternalLinkage: - case GlobalValue::PrivateLinkage: - break; - case GlobalValue::DLLImportLinkage: - llvm_unreachable("DLLImport linkage is not supported by this target!"); - case GlobalValue::DLLExportLinkage: - llvm_unreachable("DLLExport linkage is not supported by this target!"); - default: - llvm_unreachable("Unknown linkage type!"); - } - - EmitAlignment(Align > 2 ? Align : 2, GV); - - unsigned Size = TD->getTypeAllocSize(C->getType()); - if (GV->isThreadLocal()) { - Size *= MaxThreads; - } - if (MAI->hasDotTypeDotSizeDirective()) { - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); - OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," + - Twine(Size)); - } - OutStreamer.EmitLabel(GVSym); - - EmitGlobalConstant(C); - if (GV->isThreadLocal()) { - for (unsigned i = 1; i < MaxThreads; ++i) - EmitGlobalConstant(C); - } - // The ABI requires that unsigned scalar types smaller than 32 bits - // are padded to 32 bits. - if (Size < 4) - OutStreamer.EmitZeros(4 - Size, 0); - - // Mark the end of the global - OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data"); -} - -/// EmitFunctionBodyEnd - Targets can override this to emit stuff after -/// the last basic block in the function. -void XCoreAsmPrinter::EmitFunctionBodyEnd() { - // Emit function end directives - OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) + - ".function"); -} - -void XCoreAsmPrinter::EmitFunctionEntryLabel() { - // Mark the start of the function - OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) + - ".function," + CurrentFnSym->getName()); - OutStreamer.EmitLabel(CurrentFnSym); -} - -void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - printOperand(MI, opNum, O); - - if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) - return; - - O << "+"; - printOperand(MI, opNum+1, O); -} - -void XCoreAsmPrinter:: -printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, - const std::string &directive) { - unsigned JTI = MI->getOperand(opNum).getIndex(); - const MachineFunction *MF = MI->getParent()->getParent(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - O << "\t" << directive << " "; - for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = JTBBs[i]; - if (i > 0) - O << ","; - O << *MBB->getSymbol(); - } -} - -void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - switch (MO.getType()) { - case MachineOperand::MO_Register: - O << getRegisterName(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - O << MO.getImm(); - break; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - break; - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - break; - case MachineOperand::MO_ExternalSymbol: - O << MO.getSymbolName(); - break; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() - << '_' << MO.getIndex(); - break; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - break; - case MachineOperand::MO_BlockAddress: - O << *GetBlockAddressSymbol(MO.getBlockAddress()); - break; - default: - llvm_unreachable("not implemented"); - } -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant,const char *ExtraCode, - raw_ostream &O) { - printOperand(MI, OpNo, O); - return false; -} - -void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream O(Str); - - // Check for mov mnemonic - if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm()) - O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " - << getRegisterName(MI->getOperand(1).getReg()); - else - printInstruction(MI, O); - OutStreamer.EmitRawText(O.str()); -} - -// Force static initialization. -extern "C" void LLVMInitializeXCoreAsmPrinter() { - RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget); -} diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 38b35d7..9093de6 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -11,7 +11,8 @@ tablegen(XCoreGenCallingConv.inc -gen-callingconv) tablegen(XCoreGenSubtarget.inc -gen-subtarget) add_llvm_target(XCoreCodeGen - XCoreFrameInfo.cpp + XCoreAsmPrinter.cpp + XCoreFrameLowering.cpp XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp XCoreISelLowering.cpp @@ -22,3 +23,5 @@ add_llvm_target(XCoreCodeGen XCoreTargetObjectFile.cpp XCoreSelectionDAGInfo.cpp ) + +add_subdirectory(TargetInfo) diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile index 1b70974..6c1ef88 100644 --- a/lib/Target/XCore/Makefile +++ b/lib/Target/XCore/Makefile @@ -18,7 +18,7 @@ BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \ XCoreGenDAGISel.inc XCoreGenCallingConv.inc \ XCoreGenSubtarget.inc -DIRS = AsmPrinter TargetInfo +DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt index 0a568de..c147b8a 100644 --- a/lib/Target/XCore/TargetInfo/CMakeLists.txt +++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt @@ -4,4 +4,4 @@ add_llvm_library(LLVMXCoreInfo XCoreTargetInfo.cpp ) -add_dependencies(LLVMXCoreInfo XCoreTable_gen) +add_dependencies(LLVMXCoreInfo XCoreCodeGenTable_gen) diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp new file mode 100644 index 0000000..8f06dd3 --- /dev/null +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -0,0 +1,280 @@ +//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the XAS-format XCore assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "XCore.h" +#include "XCoreInstrInfo.h" +#include "XCoreSubtarget.h" +#include "XCoreMCAsmInfo.h" +#include "XCoreTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cctype> +using namespace llvm; + +static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional, + cl::desc("Maximum number of threads (for emulation thread-local storage)"), + cl::Hidden, + cl::value_desc("number"), + cl::init(8)); + +namespace { + class XCoreAsmPrinter : public AsmPrinter { + const XCoreSubtarget &Subtarget; + public: + explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){} + + virtual const char *getPassName() const { + return "XCore Assembly Printer"; + } + + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, + const std::string &directive = ".jmptable"); + void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) { + printInlineJT(MI, opNum, O, ".jmptable32"); + } + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV); + virtual void EmitGlobalVariable(const GlobalVariable *GV); + + void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd. + static const char *getRegisterName(unsigned RegNo); + + void EmitFunctionEntryLabel(); + void EmitInstruction(const MachineInstr *MI); + void EmitFunctionBodyEnd(); + }; +} // end of anonymous namespace + +#include "XCoreGenAsmWriter.inc" + +void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) { + assert(((GV->hasExternalLinkage() || + GV->hasWeakLinkage()) || + GV->hasLinkOnceLinkage()) && "Unexpected linkage"); + if (const ArrayType *ATy = dyn_cast<ArrayType>( + cast<PointerType>(GV->getType())->getElementType())) { + OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); + // FIXME: MCStreamerize. + OutStreamer.EmitRawText(StringRef(".globound")); + OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName())); + OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements())); + if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) { + // TODO Use COMDAT groups for LinkOnceLinkage + OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+ + ".globound"); + } + } +} + +void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (!GV->hasInitializer() || + EmitSpecialLLVMGlobal(GV)) + return; + + const TargetData *TD = TM.getTargetData(); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM)); + + + MCSymbol *GVSym = Mang->getSymbol(GV); + Constant *C = GV->getInitializer(); + unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType()); + + // Mark the start of the global + OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," + + GVSym->getName()); + + switch (GV->getLinkage()) { + case GlobalValue::AppendingLinkage: + report_fatal_error("AppendingLinkage is not supported by this target!"); + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::ExternalLinkage: + emitArrayBound(GVSym, GV); + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); + + // TODO Use COMDAT groups for LinkOnceLinkage + if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); + // FALL THROUGH + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + break; + case GlobalValue::DLLImportLinkage: + llvm_unreachable("DLLImport linkage is not supported by this target!"); + case GlobalValue::DLLExportLinkage: + llvm_unreachable("DLLExport linkage is not supported by this target!"); + default: + llvm_unreachable("Unknown linkage type!"); + } + + EmitAlignment(Align > 2 ? Align : 2, GV); + + unsigned Size = TD->getTypeAllocSize(C->getType()); + if (GV->isThreadLocal()) { + Size *= MaxThreads; + } + if (MAI->hasDotTypeDotSizeDirective()) { + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," + + Twine(Size)); + } + OutStreamer.EmitLabel(GVSym); + + EmitGlobalConstant(C); + if (GV->isThreadLocal()) { + for (unsigned i = 1; i < MaxThreads; ++i) + EmitGlobalConstant(C); + } + // The ABI requires that unsigned scalar types smaller than 32 bits + // are padded to 32 bits. + if (Size < 4) + OutStreamer.EmitZeros(4 - Size, 0); + + // Mark the end of the global + OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data"); +} + +/// EmitFunctionBodyEnd - Targets can override this to emit stuff after +/// the last basic block in the function. +void XCoreAsmPrinter::EmitFunctionBodyEnd() { + // Emit function end directives + OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) + + ".function"); +} + +void XCoreAsmPrinter::EmitFunctionEntryLabel() { + // Mark the start of the function + OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) + + ".function," + CurrentFnSym->getName()); + OutStreamer.EmitLabel(CurrentFnSym); +} + +void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + printOperand(MI, opNum, O); + + if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) + return; + + O << "+"; + printOperand(MI, opNum+1, O); +} + +void XCoreAsmPrinter:: +printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O, + const std::string &directive) { + unsigned JTI = MI->getOperand(opNum).getIndex(); + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + O << "\t" << directive << " "; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + if (i > 0) + O << ","; + O << *MBB->getSymbol(); + } +} + +void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + break; + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + break; + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + case MachineOperand::MO_BlockAddress: + O << *GetBlockAddressSymbol(MO.getBlockAddress()); + break; + default: + llvm_unreachable("not implemented"); + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant,const char *ExtraCode, + raw_ostream &O) { + printOperand(MI, OpNo, O); + return false; +} + +void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream O(Str); + + // Check for mov mnemonic + if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm()) + O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", " + << getRegisterName(MI->getOperand(1).getReg()); + else + printInstruction(MI, O); + OutStreamer.EmitRawText(O.str()); +} + +// Force static initialization. +extern "C" void LLVMInitializeXCoreAsmPrinter() { + RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget); +} diff --git a/lib/Target/XCore/XCoreCallingConv.td b/lib/Target/XCore/XCoreCallingConv.td index 8107e32..b20d71f 100644 --- a/lib/Target/XCore/XCoreCallingConv.td +++ b/lib/Target/XCore/XCoreCallingConv.td @@ -24,6 +24,9 @@ def CC_XCore : CallingConv<[ // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType<i32>>, + // The 'nest' parameter, if any, is passed in R11. + CCIfNest<CCAssignToReg<[R11]>>, + // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, diff --git a/lib/Target/XCore/XCoreFrameInfo.cpp b/lib/Target/XCore/XCoreFrameInfo.cpp deleted file mode 100644 index f50dc96..0000000 --- a/lib/Target/XCore/XCoreFrameInfo.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===-- XCoreFrameInfo.cpp - Frame info for XCore Target ---------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains XCore frame information that doesn't fit anywhere else -// cleanly... -// -//===----------------------------------------------------------------------===// - -#include "XCore.h" -#include "XCoreFrameInfo.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// XCoreFrameInfo: -//===----------------------------------------------------------------------===// - -XCoreFrameInfo::XCoreFrameInfo(const TargetMachine &tm): - TargetFrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) -{ - // Do nothing -} diff --git a/lib/Target/XCore/XCoreFrameInfo.h b/lib/Target/XCore/XCoreFrameInfo.h deleted file mode 100644 index 2c67577..0000000 --- a/lib/Target/XCore/XCoreFrameInfo.h +++ /dev/null @@ -1,34 +0,0 @@ -//===-- XCoreFrameInfo.h - Frame info for XCore Target -----------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains XCore frame information that doesn't fit anywhere else -// cleanly... -// -//===----------------------------------------------------------------------===// - -#ifndef XCOREFRAMEINFO_H -#define XCOREFRAMEINFO_H - -#include "llvm/Target/TargetFrameInfo.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class XCoreFrameInfo: public TargetFrameInfo { - - public: - XCoreFrameInfo(const TargetMachine &tm); - - //! Stack slot size (4 bytes) - static int stackSlotSize() { - return 4; - } - }; -} - -#endif // XCOREFRAMEINFO_H diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp new file mode 100644 index 0000000..0578220 --- /dev/null +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -0,0 +1,387 @@ +//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains XCore frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// + +#include "XCore.h" +#include "XCoreFrameLowering.h" +#include "XCoreInstrInfo.h" +#include "XCoreMachineFunctionInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +// helper functions. FIXME: Eliminate. +static inline bool isImmUs(unsigned val) { + return val <= 11; +} + +static inline bool isImmU6(unsigned val) { + return val < (1 << 6); +} + +static inline bool isImmU16(unsigned val) { + return val < (1 << 16); +} + +static void loadFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, int Offset, DebugLoc dl, + const TargetInstrInfo &TII) { + assert(Offset%4 == 0 && "Misaligned stack offset"); + Offset/=4; + bool isU6 = isImmU6(Offset); + if (!isU6 && !isImmU16(Offset)) + report_fatal_error("loadFromStack offset too big " + Twine(Offset)); + int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; + BuildMI(MBB, I, dl, TII.get(Opcode), DstReg) + .addImm(Offset); +} + + +static void storeToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, int Offset, DebugLoc dl, + const TargetInstrInfo &TII) { + assert(Offset%4 == 0 && "Misaligned stack offset"); + Offset/=4; + bool isU6 = isImmU6(Offset); + if (!isU6 && !isImmU16(Offset)) + report_fatal_error("storeToStack offset too big " + Twine(Offset)); + int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6; + BuildMI(MBB, I, dl, TII.get(Opcode)) + .addReg(SrcReg) + .addImm(Offset); +} + + +//===----------------------------------------------------------------------===// +// XCoreFrameLowering: +//===----------------------------------------------------------------------===// + +XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), + STI(sti) { + // Do nothing +} + +bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const { + return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects(); +} + +void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo *MMI = &MF.getMMI(); + const XCoreRegisterInfo *RegInfo = + static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const XCoreInstrInfo &TII = + *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo()); + XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + bool FP = hasFP(MF); + bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest); + + if (Nested) { + loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII); + } + + // Work out frame sizes. + int FrameSize = MFI->getStackSize(); + assert(FrameSize%4 == 0 && "Misaligned frame size"); + FrameSize/=4; + + bool isU6 = isImmU6(FrameSize); + + if (!isU6 && !isImmU16(FrameSize)) { + // FIXME could emit multiple instructions. + report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize)); + } + bool emitFrameMoves = RegInfo->needsFrameMoves(MF); + + // Do we need to allocate space on the stack? + if (FrameSize) { + bool saveLR = XFI->getUsesLR(); + bool LRSavedOnEntry = false; + int Opcode; + if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) { + Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; + MBB.addLiveIn(XCore::LR); + saveLR = false; + LRSavedOnEntry = true; + } else { + Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; + } + BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); + + if (emitFrameMoves) { + std::vector<MachineMove> &Moves = MMI->getFrameMoves(); + + // Show update of SP. + MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); + + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + + if (LRSavedOnEntry) { + MachineLocation CSDst(MachineLocation::VirtualFP, 0); + MachineLocation CSSrc(XCore::LR); + Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); + } + } + if (saveLR) { + int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); + storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII); + MBB.addLiveIn(XCore::LR); + + if (emitFrameMoves) { + MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel); + MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); + MachineLocation CSSrc(XCore::LR); + MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc)); + } + } + } + + if (FP) { + // Save R10 to the stack. + int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); + storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII); + // R10 is live-in. It is killed at the spill. + MBB.addLiveIn(XCore::R10); + if (emitFrameMoves) { + MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label); + MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); + MachineLocation CSSrc(XCore::R10); + MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc)); + } + // Set the FP from the SP. + unsigned FramePtr = XCore::R10; + BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr) + .addImm(0); + if (emitFrameMoves) { + // Show FP is now valid. + MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); + MachineLocation SPDst(FramePtr); + MachineLocation SPSrc(MachineLocation::VirtualFP); + MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + } + } + + if (emitFrameMoves) { + // Frame moves for callee saved. + std::vector<MachineMove> &Moves = MMI->getFrameMoves(); + std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels = + XFI->getSpillLabels(); + for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) { + MCSymbol *SpillLabel = SpillLabels[I].first; + CalleeSavedInfo &CSI = SpillLabels[I].second; + int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); + unsigned Reg = CSI.getReg(); + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc)); + } + } +} + +void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const XCoreInstrInfo &TII = + *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + + bool FP = hasFP(MF); + if (FP) { + // Restore the stack pointer. + unsigned FramePtr = XCore::R10; + BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)) + .addReg(FramePtr); + } + + // Work out frame sizes. + int FrameSize = MFI->getStackSize(); + + assert(FrameSize%4 == 0 && "Misaligned frame size"); + + FrameSize/=4; + + bool isU6 = isImmU6(FrameSize); + + if (!isU6 && !isImmU16(FrameSize)) { + // FIXME could emit multiple instructions. + report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize)); + } + + if (FrameSize) { + XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); + + if (FP) { + // Restore R10 + int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); + FPSpillOffset += FrameSize*4; + loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII); + } + bool restoreLR = XFI->getUsesLR(); + if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) { + int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); + LRSpillOffset += FrameSize*4; + loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII); + restoreLR = false; + } + if (restoreLR) { + // Fold prologue into return instruction + assert(MBBI->getOpcode() == XCore::RETSP_u6 + || MBBI->getOpcode() == XCore::RETSP_lu6); + int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6; + BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); + MBB.erase(MBBI); + } else { + int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs; + BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize); + } + } +} + +void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) + const { + // Initial state of the frame pointer is SP. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(XCore::SP, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + +bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return true; + + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>(); + bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF); + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin(); + it != CSI.end(); ++it) { + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(it->getReg()); + + unsigned Reg = it->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, + it->getFrameIdx(), RC, TRI); + if (emitFrameMoves) { + MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); + BuildMI(MBB, MI, DL, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLabel); + XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it)); + } + } + return true; +} + +bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const{ + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + bool AtStart = MI == MBB.begin(); + MachineBasicBlock::iterator BeforeI = MI; + if (!AtStart) + --BeforeI; + for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin(); + it != CSI.end(); ++it) { + unsigned Reg = it->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(), + RC, TRI); + assert(MI != MBB.begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert multiple + // instructions. + if (AtStart) + MI = MBB.begin(); + else { + MI = BeforeI; + ++MI; + } + } + return true; +} + +void +XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR); + const TargetRegisterClass *RC = XCore::GRRegsRegisterClass; + XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); + if (LRUsed) { + MF.getRegInfo().setPhysRegUnused(XCore::LR); + + bool isVarArg = MF.getFunction()->isVarArg(); + int FrameIdx; + if (! isVarArg) { + // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. + FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true); + } else { + FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), + false); + } + XFI->setUsesLR(FrameIdx); + XFI->setLRSpillSlot(FrameIdx); + } + if (RegInfo->requiresRegisterScavenging(MF)) { + // Reserve a slot close to SP or frame pointer. + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } + if (hasFP(MF)) { + // A callee save register is used to hold the FP. + // This needs saving / restoring in the epilogue / prologue. + XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } +} + +void XCoreFrameLowering:: +processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + +} diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h new file mode 100644 index 0000000..7da19f0 --- /dev/null +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -0,0 +1,59 @@ +//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains XCore frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// + +#ifndef XCOREFRAMEINFO_H +#define XCOREFRAMEINFO_H + +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class XCoreSubtarget; + + class XCoreFrameLowering: public TargetFrameLowering { + const XCoreSubtarget &STI; + public: + XCoreFrameLowering(const XCoreSubtarget &STI); + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasFP(const MachineFunction &MF) const; + + void getInitialFrameState(std::vector<MachineMove> &Moves) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + //! Stack slot size (4 bytes) + static int stackSlotSize() { + return 4; + } + }; +} + +#endif // XCOREFRAMEINFO_H diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 755ece7..fc8a07a 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -68,12 +68,9 @@ namespace { } // Complex Pattern Selectors. - bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base, - SDValue &Offset); - bool SelectADDRdpii(SDNode *Op, SDValue Addr, SDValue &Base, - SDValue &Offset); - bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base, - SDValue &Offset); + bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset); virtual const char *getPassName() const { return "XCore DAG->DAG Pattern Instruction Selection"; @@ -91,8 +88,8 @@ FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) { return new XCoreDAGToDAGISel(TM); } -bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr, - SDValue &Base, SDValue &Offset) { +bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base, + SDValue &Offset) { FrameIndexSDNode *FIN = 0; if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); @@ -113,8 +110,8 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDNode *Op, SDValue Addr, return false; } -bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr, - SDValue &Base, SDValue &Offset) { +bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base, + SDValue &Offset) { if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) { Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(0, MVT::i32); @@ -134,8 +131,8 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDNode *Op, SDValue Addr, return false; } -bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr, - SDValue &Base, SDValue &Offset) { +bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base, + SDValue &Offset) { if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) { Base = Addr.getOperand(0); Offset = CurDAG->getTargetConstant(0, MVT::i32); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index abe7b2f..828d6f9 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -148,9 +148,13 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - - maxStoresPerMemset = 4; - maxStoresPerMemmove = maxStoresPerMemcpy = 2; + + // TRAMPOLINE is custom lowered. + setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); + + maxStoresPerMemset = maxStoresPerMemsetOptSize = 4; + maxStoresPerMemmove = maxStoresPerMemmoveOptSize + = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2; // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::STORE); @@ -177,6 +181,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); default: llvm_unreachable("unimplemented operand"); return SDValue(); @@ -392,24 +397,23 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, } SDValue XCoreTargetLowering:: -LowerLOAD(SDValue Op, SelectionDAG &DAG) const -{ +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LD = cast<LoadSDNode>(Op); assert(LD->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension type"); assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT"); - if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) return SDValue(); - } + unsigned ABIAlignment = getTargetData()-> getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext())); // Leave aligned load alone. - if (LD->getAlignment() >= ABIAlignment) { + if (LD->getAlignment() >= ABIAlignment) return SDValue(); - } + SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); SDValue Base; int64_t Offset; @@ -419,10 +423,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const // We've managed to infer better alignment information than the load // already has. Use an aligned load. // - // FIXME: No new alignment information is actually passed here. - // Should the offset really be 4? - // - return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4, + return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr, + MachinePointerInfo(), false, false, 0); } // Lower to @@ -436,40 +438,40 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); - SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset); - SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset); + SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset); + SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset); - SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain, - LowAddr, NULL, 4, false, false, 0); - SDValue High = DAG.getLoad(getPointerTy(), dl, Chain, - HighAddr, NULL, 4, false, false, 0); - SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift); - SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift); - SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1), + SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, + LowAddr, MachinePointerInfo(), false, false, 0); + SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, + HighAddr, MachinePointerInfo(), false, false, 0); + SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift); + SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift); + SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), High.getValue(1)); SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, 2, DL); } if (LD->getAlignment() == 2) { - int SVOffset = LD->getSrcValueOffset(); - SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain, - BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, + SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, + BasePtr, LD->getPointerInfo(), MVT::i16, LD->isVolatile(), LD->isNonTemporal(), 2); - SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, + SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); - SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain, - HighAddr, LD->getSrcValue(), SVOffset + 2, + SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, + HighAddr, + LD->getPointerInfo().getWithOffset(2), MVT::i16, LD->isVolatile(), LD->isNonTemporal(), 2); - SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, + SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, DAG.getConstant(16, MVT::i32)); - SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1), + SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), High.getValue(1)); SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, 2, DL); } // Lower to a call to __misaligned_load(BasePtr). @@ -486,12 +488,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_load", getPointerTy()), - Args, DAG, dl); + Args, DAG, DL); SDValue Ops[] = { CallResult.first, CallResult.second }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, 2, DL); } SDValue XCoreTargetLowering:: @@ -515,18 +517,17 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const DebugLoc dl = Op.getDebugLoc(); if (ST->getAlignment() == 2) { - int SVOffset = ST->getSrcValueOffset(); SDValue Low = Value; SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, DAG.getConstant(16, MVT::i32)); SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr, - ST->getSrcValue(), SVOffset, MVT::i16, + ST->getPointerInfo(), MVT::i16, ST->isVolatile(), ST->isNonTemporal(), 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr, - ST->getSrcValue(), SVOffset + 2, + ST->getPointerInfo().getWithOffset(2), MVT::i16, ST->isVolatile(), ST->isNonTemporal(), 2); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); @@ -757,16 +758,18 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); EVT VT = Node->getValueType(0); SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(1), V, 0, false, false, 0); + Node->getOperand(1), MachinePointerInfo(V), + false, false, 0); // Increment the pointer, VAList, to the next vararg SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, DAG.getConstant(VT.getSizeInBits(), getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0, - false, false, 0); + Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), + MachinePointerInfo(V), false, false, 0); // Load the actual argument out of the pointer VAList - return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0); + return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), + false, false, 0); } SDValue XCoreTargetLowering:: @@ -778,9 +781,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const MachineFunction &MF = DAG.getMachineFunction(); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); - const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0, - false, false, 0); + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), + MachinePointerInfo(), false, false, 0); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -796,6 +798,64 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, RegInfo->getFrameRegister(MF), MVT::i32); } +SDValue XCoreTargetLowering:: +LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Trmp = Op.getOperand(1); // trampoline + SDValue FPtr = Op.getOperand(2); // nested function + SDValue Nest = Op.getOperand(3); // 'nest' parameter value + + const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); + + // .align 4 + // LDAPF_u10 r11, nest + // LDW_2rus r11, r11[0] + // STWSP_ru6 r11, sp[0] + // LDAPF_u10 r11, fptr + // LDW_2rus r11, r11[0] + // BAU_1r r11 + // nest: + // .word nest + // fptr: + // .word fptr + SDValue OutChains[5]; + + SDValue Addr = Trmp; + + DebugLoc dl = Op.getDebugLoc(); + OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32), + Addr, MachinePointerInfo(TrmpAddr), false, false, + 0); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, + DAG.getConstant(4, MVT::i32)); + OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32), + Addr, MachinePointerInfo(TrmpAddr, 4), false, + false, 0); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, + DAG.getConstant(8, MVT::i32)); + OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32), + Addr, MachinePointerInfo(TrmpAddr, 8), false, + false, 0); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, + DAG.getConstant(12, MVT::i32)); + OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr, + MachinePointerInfo(TrmpAddr, 12), false, false, + 0); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, + DAG.getConstant(16, MVT::i32)); + OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr, + MachinePointerInfo(TrmpAddr, 16), false, false, + 0); + + SDValue Ops[] = + { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) }; + return DAG.getMergeValues(Ops, 2, dl); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -929,7 +989,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); Ops.push_back(Callee); @@ -1035,7 +1095,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, CCInfo.AnalyzeFormalArguments(Ins, CC_XCore); - unsigned StackSlotSize = XCoreFrameInfo::stackSlotSize(); + unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize(); unsigned LRSaveSize = StackSlotSize; @@ -1068,7 +1128,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; if (ObjSize > StackSlotSize) { errs() << "LowerFormalArguments Unhandled argument type: " - << (unsigned)VA.getLocVT().getSimpleVT().SimpleTy + << EVT(VA.getLocVT()).getEVTString() << "\n"; } // Create the frame index object for this incoming parameter... @@ -1079,7 +1139,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0, + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } @@ -1111,8 +1172,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, RegInfo.addLiveIn(ArgRegs[i], VReg); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); // Move argument from virt reg -> stack - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo(), false, false, 0); MemOps.push_back(Store); } if (!MemOps.empty()) @@ -1443,9 +1504,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(), DAG.getConstant(StoreBits/8, MVT::i32), - Alignment, false, ST->getSrcValue(), - ST->getSrcValueOffset(), LD->getSrcValue(), - LD->getSrcValueOffset()); + Alignment, false, ST->getPointerInfo(), + LD->getPointerInfo()); } } break; diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index febc198..7e5dd2e 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -147,6 +147,7 @@ namespace llvm { SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::vector<unsigned> diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index ad00046..9cb6a7d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -384,74 +384,10 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(0); } -bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) { - return true; - } - MachineFunction *MF = MBB.getParent(); - XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>(); - - bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF); - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin(); - it != CSI.end(); ++it) { - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(it->getReg()); - - unsigned Reg = it->getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, - it->getFrameIdx(), RC, &RI); - if (emitFrameMoves) { - MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); - BuildMI(MBB, MI, DL, get(XCore::PROLOG_LABEL)).addSym(SaveLabel); - XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it)); - } - } - return true; -} - -bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const -{ - bool AtStart = MI == MBB.begin(); - MachineBasicBlock::iterator BeforeI = MI; - if (!AtStart) - --BeforeI; - for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin(); - it != CSI.end(); ++it) { - unsigned Reg = it->getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - loadRegFromStackSlot(MBB, MI, it->getReg(), - it->getFrameIdx(), - RC, &RI); - assert(MI != MBB.begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert multiple - // instructions. - if (AtStart) - MI = MBB.begin(); - else { - MI = BeforeI; - ++MI; - } - } - return true; -} - /// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool XCoreInstrInfo:: -ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const -{ +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert((Cond.size() == 2) && "Invalid XCore branch condition!"); Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm())); diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index d2b116e..977fe8d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -75,15 +75,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; - - virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; virtual bool ReverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 6b3b39b..38cc734 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -29,11 +29,11 @@ include "XCoreInstrFormats.td" // Call def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInGlue]>; def SDT_XCoreBR_JT : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; @@ -66,9 +66,9 @@ def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPOutGlue]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; //===----------------------------------------------------------------------===// // Instruction Pattern Stuff @@ -610,8 +610,15 @@ def LDC_lru6 : _FLRU6< [(set GRRegs:$dst, immU16:$b)]>; } +def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val), + "setc res[$r], $val", + [(int_xcore_setc GRRegs:$r, immU6:$val)]>; + +def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val), + "setc res[$r], $val", + [(int_xcore_setc GRRegs:$r, immU16:$val)]>; + // Operand register - U6 -// TODO setc let isBranch = 1, isTerminator = 1 in { defm BRFT: FRU6_LRU6_branch<"bt">; defm BRBT: FRU6_LRU6_branch<"bt">; @@ -720,9 +727,8 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>; -// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out, -// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp, -// tsetmr, sext (reg), zext (reg) +// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp, +// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg) let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), @@ -748,6 +754,50 @@ def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size), "mkmsk $dst, $size", [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>; +def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), + "getr $dst, $type", + [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>; + +def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "outct res[$r], $val", + [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>; + +def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val), + "outct res[$r], $val", + [(int_xcore_outct GRRegs:$r, immUs:$val)]>; + +def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "outt res[$r], $val", + [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>; + +def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "out res[$r], $val", + [(int_xcore_out GRRegs:$r, GRRegs:$val)]>; + +def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "inct $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>; + +def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "int $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>; + +def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "in $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>; + +def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "chkct res[$r], $val", + [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>; + +def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val), + "chkct res[$r], $val", + [(int_xcore_chkct GRRegs:$r, immUs:$val)]>; + +def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "setd res[$r], $val", + [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; + // Two operand long // TODO settw, setclk, setrdy, setpsc, endin, peek, // getd, testlcl, tinitlr, getps, setps @@ -763,8 +813,12 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "clz $dst, $src", [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; +def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val), + "setc res[$r], $val", + [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; + // One operand short -// TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp +// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp // setdp, setcp, setv, setev, kcall // dgetreg let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in @@ -805,6 +859,10 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), [(XCoreBranchLink GRRegs:$addr)]>; } +def FREER_1r : _F1R<(outs), (ins GRRegs:$r), + "freer res[$r]", + [(int_xcore_freer GRRegs:$r)]>; + // Zero operand short // TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index f82e598..56c0879 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" @@ -84,11 +84,13 @@ const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + Reserved.set(XCore::CP); Reserved.set(XCore::DP); Reserved.set(XCore::SP); Reserved.set(XCore::LR); - if (hasFP(MF)) { + if (TFI->hasFP(MF)) { Reserved.set(XCore::R10); } return Reserved; @@ -96,12 +98,10 @@ BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - // TODO can we estimate stack size? - return hasFP(MF); -} + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); -bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const { - return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects(); + // TODO can we estimate stack size? + return TFI->hasFP(MF); } // This function eliminates ADJCALLSTACKDOWN, @@ -109,7 +109,9 @@ bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const { void XCoreRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { // Turn the adjcallstackdown instruction into 'extsp <amt>' and the // adjcallstackup instruction into 'ldaw sp, sp[<amt>]' MachineInstr *Old = I; @@ -118,14 +120,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = TFI->getStackAlignment(); Amount = (Amount+Align-1)/Align*Align; assert(Amount%4 == 0); Amount /= 4; - + bool isU6 = isImmU6(Amount); - if (!isU6 && !isImmU16(Amount)) { // FIX could emit multiple instructions in this case. #ifndef NDEBUG @@ -172,6 +173,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = FrameOp.getIndex(); MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); int StackSize = MF.getFrameInfo()->getStackSize(); @@ -197,7 +199,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset/=4; - bool FP = hasFP(MF); + bool FP = TFI->hasFP(MF); unsigned Reg = MI.getOperand(0).getReg(); bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill(); @@ -292,48 +294,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MBB.erase(II); } -void -XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR); - const TargetRegisterClass *RC = XCore::GRRegsRegisterClass; - XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); - if (LRUsed) { - MF.getRegInfo().setPhysRegUnused(XCore::LR); - - bool isVarArg = MF.getFunction()->isVarArg(); - int FrameIdx; - if (! isVarArg) { - // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true); - } else { - FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), - false); - } - XFI->setUsesLR(FrameIdx); - XFI->setLRSpillSlot(FrameIdx); - } - if (requiresRegisterScavenging(MF)) { - // Reserve a slot close to SP or frame pointer. - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } - if (hasFP(MF)) { - // A callee save register is used to hold the FP. - // This needs saving / restoring in the epilogue / prologue. - XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } -} - -void XCoreRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - -} - void XCoreRegisterInfo:: loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DstReg, int64_t Value, DebugLoc dl) const { @@ -346,229 +306,19 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); } -void XCoreRegisterInfo:: -storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, int Offset, DebugLoc dl) const { - assert(Offset%4 == 0 && "Misaligned stack offset"); - Offset/=4; - bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) - report_fatal_error("storeToStack offset too big " + Twine(Offset)); - int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6; - BuildMI(MBB, I, dl, TII.get(Opcode)) - .addReg(SrcReg) - .addImm(Offset); -} - -void XCoreRegisterInfo:: -loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DstReg, int Offset, DebugLoc dl) const { - assert(Offset%4 == 0 && "Misaligned stack offset"); - Offset/=4; - bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) - report_fatal_error("loadFromStack offset too big " + Twine(Offset)); - int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; - BuildMI(MBB, I, dl, TII.get(Opcode), DstReg) - .addImm(Offset); -} - -void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo *MMI = &MF.getMMI(); - XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - bool FP = hasFP(MF); - - // Work out frame sizes. - int FrameSize = MFI->getStackSize(); - - assert(FrameSize%4 == 0 && "Misaligned frame size"); - - FrameSize/=4; - - bool isU6 = isImmU6(FrameSize); - - if (!isU6 && !isImmU16(FrameSize)) { - // FIXME could emit multiple instructions. - report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize)); - } - bool emitFrameMoves = needsFrameMoves(MF); - - // Do we need to allocate space on the stack? - if (FrameSize) { - bool saveLR = XFI->getUsesLR(); - bool LRSavedOnEntry = false; - int Opcode; - if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) { - Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6; - MBB.addLiveIn(XCore::LR); - saveLR = false; - LRSavedOnEntry = true; - } else { - Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6; - } - BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); - - if (emitFrameMoves) { - std::vector<MachineMove> &Moves = MMI->getFrameMoves(); - - // Show update of SP. - MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); - - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4); - Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - - if (LRSavedOnEntry) { - MachineLocation CSDst(MachineLocation::VirtualFP, 0); - MachineLocation CSSrc(XCore::LR); - Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); - } - } - if (saveLR) { - int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); - storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl); - MBB.addLiveIn(XCore::LR); - - if (emitFrameMoves) { - MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel); - MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset); - MachineLocation CSSrc(XCore::LR); - MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc)); - } - } - } - - if (FP) { - // Save R10 to the stack. - int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); - storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl); - // R10 is live-in. It is killed at the spill. - MBB.addLiveIn(XCore::R10); - if (emitFrameMoves) { - MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label); - MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset); - MachineLocation CSSrc(XCore::R10); - MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc)); - } - // Set the FP from the SP. - unsigned FramePtr = XCore::R10; - BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr) - .addImm(0); - if (emitFrameMoves) { - // Show FP is now valid. - MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel); - MachineLocation SPDst(FramePtr); - MachineLocation SPSrc(MachineLocation::VirtualFP); - MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc)); - } - } - - if (emitFrameMoves) { - // Frame moves for callee saved. - std::vector<MachineMove> &Moves = MMI->getFrameMoves(); - std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels = - XFI->getSpillLabels(); - for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) { - MCSymbol *SpillLabel = SpillLabels[I].first; - CalleeSavedInfo &CSI = SpillLabels[I].second; - int Offset = MFI->getObjectOffset(CSI.getFrameIdx()); - unsigned Reg = CSI.getReg(); - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc)); - } - } -} - -void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - DebugLoc dl = MBBI->getDebugLoc(); - - bool FP = hasFP(MF); - - if (FP) { - // Restore the stack pointer. - unsigned FramePtr = XCore::R10; - BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)) - .addReg(FramePtr); - } - - // Work out frame sizes. - int FrameSize = MFI->getStackSize(); - - assert(FrameSize%4 == 0 && "Misaligned frame size"); - - FrameSize/=4; - - bool isU6 = isImmU6(FrameSize); - - if (!isU6 && !isImmU16(FrameSize)) { - // FIXME could emit multiple instructions. - report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize)); - } - - if (FrameSize) { - XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); - - if (FP) { - // Restore R10 - int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot()); - FPSpillOffset += FrameSize*4; - loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl); - } - bool restoreLR = XFI->getUsesLR(); - if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) { - int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot()); - LRSpillOffset += FrameSize*4; - loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl); - restoreLR = false; - } - if (restoreLR) { - // Fold prologue into return instruction - assert(MBBI->getOpcode() == XCore::RETSP_u6 - || MBBI->getOpcode() == XCore::RETSP_lu6); - int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6; - BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize); - MBB.erase(MBBI); - } else { - int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs; - BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize); - } - } -} - int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - bool FP = hasFP(MF); - - return FP ? XCore::R10 : XCore::SP; + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + return TFI->hasFP(MF) ? XCore::R10 : XCore::SP; } unsigned XCoreRegisterInfo::getRARegister() const { return XCore::LR; } -void XCoreRegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) - const { - // Initial state of the frame pointer is SP. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(XCore::SP, 0); - Moves.push_back(MachineMove(0, Dst, Src)); -} - #include "XCoreGenRegisterInfo.inc" diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index e636c1c..2185755 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -48,8 +48,6 @@ public: bool requiresRegisterScavenging(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; @@ -57,18 +55,9 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - void getInitialFrameState(std::vector<MachineMove> &Moves) const; //! Return the array of argument passing registers /*! diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td index 62daf5d..765f717 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.td +++ b/lib/Target/XCore/XCoreRegisterInfo.td @@ -61,8 +61,8 @@ def GRRegs : RegisterClass<"XCore", [i32], 32, GRRegsClass::iterator GRRegsClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) + const TargetFrameLowering *TFI = TM.getFrameLowering(); + if (TFI->hasFP(MF)) return end()-1; // don't allocate R10 else return end(); diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index b0013eb..30da2c8 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -27,7 +27,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), - FrameInfo(*this), + FrameLowering(Subtarget), TLInfo(*this), TSInfo(*this) { } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 14073ba..24daadc 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -16,7 +16,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" -#include "XCoreFrameInfo.h" +#include "XCoreFrameLowering.h" #include "XCoreSubtarget.h" #include "XCoreInstrInfo.h" #include "XCoreISelLowering.h" @@ -28,7 +28,7 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreSubtarget Subtarget; const TargetData DataLayout; // Calculates type size & alignment XCoreInstrInfo InstrInfo; - XCoreFrameInfo FrameInfo; + XCoreFrameLowering FrameLowering; XCoreTargetLowering TLInfo; XCoreSelectionDAGInfo TSInfo; public: @@ -36,7 +36,9 @@ public: const std::string &FS); virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const XCoreFrameLowering *getFrameLowering() const { + return &FrameLowering; + } virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual const XCoreTargetLowering *getTargetLowering() const { return &TLInfo; diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp index cdf5a53..7f4e1c1 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -12,6 +12,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ELF.h" using namespace llvm; @@ -19,31 +20,31 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); DataSection = - Ctx.getELFSection(".dp.data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | - MCSectionELF::XCORE_SHF_DP_SECTION, - SectionKind::getDataRel(), false); + Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE | + ELF::XCORE_SHF_DP_SECTION, + SectionKind::getDataRel()); BSSSection = - Ctx.getELFSection(".dp.bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | - MCSectionELF::XCORE_SHF_DP_SECTION, - SectionKind::getBSS(), false); + Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE | + ELF::XCORE_SHF_DP_SECTION, + SectionKind::getBSS()); MergeableConst4Section = - Ctx.getELFSection(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionELF::XCORE_SHF_CP_SECTION, - SectionKind::getMergeableConst4(), false); + Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE | + ELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst4()); MergeableConst8Section = - Ctx.getELFSection(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionELF::XCORE_SHF_CP_SECTION, - SectionKind::getMergeableConst8(), false); + Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE | + ELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst8()); MergeableConst16Section = - Ctx.getELFSection(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionELF::XCORE_SHF_CP_SECTION, - SectionKind::getMergeableConst16(), false); + Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE | + ELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst16()); // TLS globals are lowered in the backend to arrays indexed by the current // thread id. After lowering they require no special handling by the linker @@ -52,10 +53,10 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TLSBSSSection = BSSSection; ReadOnlySection = - Ctx.getELFSection(".cp.rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionELF::XCORE_SHF_CP_SECTION, - SectionKind::getReadOnlyWithRel(), false); + Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | + ELF::XCORE_SHF_CP_SECTION, + SectionKind::getReadOnlyWithRel()); // Dynamic linking is not supported. Data with relocations is placed in the // same section as data without relocations. -- cgit v1.1