summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib')
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp31
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp38
-rw-r--r--contrib/llvm/lib/Analysis/IPA/IPA.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp61
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp29
-rw-r--r--contrib/llvm/lib/Analysis/RegionInfo.cpp35
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp105
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp215
-rw-r--r--contrib/llvm/lib/AsmParser/LLLexer.cpp1
-rw-r--r--contrib/llvm/lib/AsmParser/LLParser.cpp98
-rw-r--r--contrib/llvm/lib/AsmParser/LLToken.h1
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitReader.cpp1
-rw-r--r--contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp2
-rw-r--r--contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/Analysis.cpp225
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp17
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp11
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h5
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h6
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp143
-rw-r--r--contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h18
-rw-r--r--contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp15
-rw-r--r--contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/CallingConvLower.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/CodeGen.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/IfConversion.cpp29
-rw-r--r--contrib/llvm/lib/CodeGen/InlineSpiller.cpp19
-rw-r--r--contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp205
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp18
-rw-r--r--contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp54
-rw-r--r--contrib/llvm/lib/CodeGen/MachineScheduler.cpp322
-rw-r--r--contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp16
-rw-r--r--contrib/llvm/lib/CodeGen/MachineVerifier.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/Passes.cpp86
-rw-r--r--contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp8
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocBasic.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp6
-rw-r--r--contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp45
-rw-r--r--contrib/llvm/lib/CodeGen/RegisterScavenging.cpp13
-rw-r--r--contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp238
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp58
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp3
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp12
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp66
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp46
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp102
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp119
-rw-r--r--contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp4
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp82
-rw-r--r--contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp55
-rw-r--r--contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp26
-rw-r--r--contrib/llvm/lib/CodeGen/TargetSchedule.cpp2
-rw-r--r--contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp16
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp4
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFContext.cpp160
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFContext.h3
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp340
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h11
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp91
-rw-r--r--contrib/llvm/lib/DebugInfo/DWARFFormValue.h82
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp6
-rw-r--r--contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp84
-rw-r--r--contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp324
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp91
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h14
-rw-r--r--contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp37
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp86
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp315
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h48
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h74
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp177
-rw-r--r--contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h30
-rw-r--r--contrib/llvm/lib/IR/AsmWriter.cpp28
-rw-r--r--contrib/llvm/lib/IR/AttributeImpl.h20
-rw-r--r--contrib/llvm/lib/IR/Attributes.cpp141
-rw-r--r--contrib/llvm/lib/IR/Constants.cpp28
-rw-r--r--contrib/llvm/lib/IR/ConstantsContext.h2
-rw-r--r--contrib/llvm/lib/IR/Core.cpp104
-rw-r--r--contrib/llvm/lib/IR/DIBuilder.cpp30
-rw-r--r--contrib/llvm/lib/IR/DataLayout.cpp6
-rw-r--r--contrib/llvm/lib/IR/DebugInfo.cpp42
-rw-r--r--contrib/llvm/lib/IR/Function.cpp7
-rw-r--r--contrib/llvm/lib/IR/Metadata.cpp36
-rw-r--r--contrib/llvm/lib/IR/PassManager.cpp4
-rw-r--r--contrib/llvm/lib/IR/Type.cpp4
-rw-r--r--contrib/llvm/lib/IR/Value.cpp10
-rw-r--r--contrib/llvm/lib/IR/Verifier.cpp231
-rw-r--r--contrib/llvm/lib/Linker/LinkModules.cpp59
-rw-r--r--contrib/llvm/lib/Linker/Linker.cpp70
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp2
-rw-r--r--contrib/llvm/lib/MC/MCAsmStreamer.cpp24
-rw-r--r--contrib/llvm/lib/MC/MCAssembler.cpp30
-rw-r--r--contrib/llvm/lib/MC/MCDwarf.cpp42
-rw-r--r--contrib/llvm/lib/MC/MCELFStreamer.cpp53
-rw-r--r--contrib/llvm/lib/MC/MCExpr.cpp39
-rw-r--r--contrib/llvm/lib/MC/MCMachOStreamer.cpp4
-rw-r--r--contrib/llvm/lib/MC/MCNullStreamer.cpp7
-rw-r--r--contrib/llvm/lib/MC/MCObjectFileInfo.cpp24
-rw-r--r--contrib/llvm/lib/MC/MCObjectStreamer.cpp43
-rw-r--r--contrib/llvm/lib/MC/MCParser/AsmParser.cpp107
-rw-r--r--contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp6
-rw-r--r--contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp53
-rw-r--r--contrib/llvm/lib/MC/MCPureStreamer.cpp27
-rw-r--r--contrib/llvm/lib/MC/MCSectionCOFF.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCSectionELF.cpp12
-rw-r--r--contrib/llvm/lib/MC/MCSectionMachO.cpp3
-rw-r--r--contrib/llvm/lib/MC/MCStreamer.cpp15
-rw-r--r--contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp66
-rw-r--r--contrib/llvm/lib/Object/COFFObjectFile.cpp5
-rw-r--r--contrib/llvm/lib/Object/MachOObject.cpp422
-rw-r--r--contrib/llvm/lib/Object/MachOObjectFile.cpp1806
-rw-r--r--contrib/llvm/lib/Object/Object.cpp39
-rw-r--r--contrib/llvm/lib/Object/ObjectFile.cpp8
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp228
-rw-r--r--contrib/llvm/lib/Support/Compression.cpp97
-rw-r--r--contrib/llvm/lib/Support/DataExtractor.cpp2
-rw-r--r--contrib/llvm/lib/Support/FoldingSet.cpp4
-rw-r--r--contrib/llvm/lib/Support/Host.cpp39
-rw-r--r--contrib/llvm/lib/Support/LockFileManager.cpp37
-rw-r--r--contrib/llvm/lib/Support/PathV2.cpp7
-rw-r--r--contrib/llvm/lib/Support/Triple.cpp8
-rw-r--r--contrib/llvm/lib/Support/Unix/Memory.inc2
-rw-r--r--contrib/llvm/lib/Support/Unix/PathV2.inc4
-rw-r--r--contrib/llvm/lib/Support/Unix/Program.inc24
-rw-r--r--contrib/llvm/lib/Support/Unix/Signals.inc21
-rw-r--r--contrib/llvm/lib/Support/Windows/Program.inc71
-rw-r--r--contrib/llvm/lib/Support/Windows/Signals.inc22
-rw-r--r--contrib/llvm/lib/Support/YAMLParser.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp5
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp181
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp244
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h11
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td123
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp12
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp9
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h20
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp6
-rw-r--r--contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp67
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h6
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp12
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp88
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp368
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h20
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td81
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td41
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td22
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp189
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h10
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp74
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp24
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp28
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp18
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp159
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h13
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp198
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h114
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp14
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp84
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.h19
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.td26
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp50
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp29
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp10
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp42
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h5
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp485
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h17
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td595
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td40
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td904
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h9
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp185
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp35
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp18
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp63
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp114
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td3
-rw-r--r--contrib/llvm/lib/Target/Mangler.cpp7
-rw-r--r--contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp752
-rw-r--r--contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp34
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp77
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td112
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td67
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp7
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp7
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp26
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td27
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td562
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp1
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp220
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h12
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFormats.td36
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td148
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp34
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h66
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsOs16.cpp113
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsOs16.h49
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td50
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp101
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp109
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h6
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp316
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h8
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp45
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp78
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h48
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp91
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.h17
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp133
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h26
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp436
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp35
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td40
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSection.h3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp8
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp739
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp12
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp98
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp72
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td610
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td148
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td51
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp806
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h71
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td1359
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp168
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h13
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp24
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPU.h1
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp101
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td8
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h1
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUInstructions.td169
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp5
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h2
-rw-r--r--contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp4
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILBase.td4
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp7
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp23
-rw-r--r--contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp1215
-rw-r--r--contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp45
-rw-r--r--contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h5
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp29
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp39
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp4
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h7
-rw-r--r--contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp499
-rw-r--r--contrib/llvm/lib/Target/R600/Processors.td58
-rw-r--r--contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp341
-rw-r--r--contrib/llvm/lib/Target/R600/R600Defines.h44
-rw-r--r--contrib/llvm/lib/Target/R600/R600ISelLowering.cpp18
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.cpp45
-rw-r--r--contrib/llvm/lib/Target/R600/R600InstrInfo.h9
-rw-r--r--contrib/llvm/lib/Target/R600/R600Instructions.td736
-rw-r--r--contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h1
-rw-r--r--contrib/llvm/lib/Target/R600/R600Packetizer.cpp459
-rw-r--r--contrib/llvm/lib/Target/R600/R600RegisterInfo.td10
-rw-r--r--contrib/llvm/lib/Target/R600/R600Schedule.td13
-rw-r--r--contrib/llvm/lib/Target/R600/SIDefines.h22
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.cpp157
-rw-r--r--contrib/llvm/lib/Target/R600/SIISelLowering.h6
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrFormats.td50
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.h1
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstrInfo.td61
-rw-r--r--contrib/llvm/lib/Target/R600/SIInstructions.td516
-rw-r--r--contrib/llvm/lib/Target/R600/SIIntrinsics.td6
-rw-r--r--contrib/llvm/lib/Target/R600/SIRegisterInfo.td12
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h62
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp30
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp50
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcCallingConv.td109
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp52
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h8
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp10
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp699
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td50
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td8
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td9
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.h6
-rw-r--r--contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp689
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp150
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h68
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp151
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp38
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h31
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp131
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h31
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp140
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp160
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h62
-rw-r--r--contrib/llvm/lib/Target/SystemZ/README.txt146
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.h77
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZ.td75
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp113
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h52
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp21
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h23
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td65
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp62
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h55
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp535
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h93
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp616
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp2233
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h212
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h48
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td318
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td987
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp444
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h123
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td955
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp116
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h47
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h74
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperands.td435
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZOperators.td196
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td71
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp162
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h70
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td150
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp56
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h48
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp60
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h74
-rw-r--r--contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/Target.cpp18
-rw-r--r--contrib/llvm/lib/Target/TargetMachineC.cpp98
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp1235
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h46
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp44
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp18
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h5
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td16
-rw-r--r--contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp8
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp144
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp253
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp64
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.h31
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp40
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp344
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h7
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFormats.td36
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp8
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td126
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td57
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td4
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td17
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedHaswell.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp25
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h4
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp35
-rw-r--r--contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp11
-rw-r--r--contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp11
-rw-r--r--contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp3
-rw-r--r--contrib/llvm/lib/Target/XCore/XCore.h4
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp15
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp56
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp202
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.h3
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td81
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp145
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp6
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp15
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp13
-rw-r--r--contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp104
-rw-r--r--contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp23
-rw-r--r--contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp7
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombine.h1
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp148
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp24
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp60
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp82
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp141
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp192
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp96
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp85
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp3
-rw-r--r--contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp65
-rw-r--r--contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp683
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp9
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GVN.cpp391
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp5
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp43
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp35
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp41
-rw-r--r--contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp25
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Local.cpp29
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp101
-rw-r--r--contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp21
-rw-r--r--contrib/llvm/lib/Transforms/Utils/Utils.cpp1
-rw-r--r--contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp2
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp514
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp348
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp730
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/VecUtils.h164
-rw-r--r--contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp7
464 files changed, 33984 insertions, 12166 deletions
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index 66e416cd..349c417 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -11,6 +11,8 @@
#include "llvm-c/Initialization.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/InitializePasses.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassRegistry.h"
#include <cstring>
using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index ae6da1a..f8509dd 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
const TargetLibraryInfo &TLI,
bool RoundToAlign = false) {
uint64_t Size;
- if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+ if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
return Size;
return AliasAnalysis::UnknownSize;
}
@@ -98,6 +98,35 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
static bool isObjectSmallerThan(const Value *V, uint64_t Size,
const DataLayout &TD,
const TargetLibraryInfo &TLI) {
+ // Note that the meanings of the "object" are slightly different in the
+ // following contexts:
+ // c1: llvm::getObjectSize()
+ // c2: llvm.objectsize() intrinsic
+ // c3: isObjectSmallerThan()
+ // c1 and c2 share the same meaning; however, the meaning of "object" in c3
+ // refers to the "entire object".
+ //
+ // Consider this example:
+ // char *p = (char*)malloc(100)
+ // char *q = p+80;
+ //
+ // In the context of c1 and c2, the "object" pointed by q refers to the
+ // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20.
+ //
+ // However, in the context of c3, the "object" refers to the chunk of memory
+ // being allocated. So, the "object" has 100 bytes, and q points to the middle
+ // the "object". In case q is passed to isObjectSmallerThan() as the 1st
+ // parameter, before the llvm::getObjectSize() is called to get the size of
+ // entire object, we should:
+ // - either rewind the pointer q to the base-address of the object in
+ // question (in this case rewind to p), or
+ // - just give up. It is up to caller to make sure the pointer is pointing
+ // to the base address the object.
+ //
+ // We go for 2nd option for simplicity.
+ if (!isIdentifiedObject(V))
+ return false;
+
// This function needs to use the aligned object size because we allow
// reads a bit past the end given sufficient alignment.
uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index 09d7608..bc0dffc 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -550,7 +551,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
if (Opc == Instruction::And && DL) {
- unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType());
+ unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
@@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
}
-/// ConstantFoldConstantExpression - Attempt to fold the constant expression
-/// using the specified DataLayout. If successful, the constant result is
-/// result is returned, if not, null is returned.
-Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
- const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
- SmallVector<Constant*, 8> Ops;
- for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
- i != e; ++i) {
+static Constant *
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
+ const TargetLibraryInfo *TLI,
+ SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
+ SmallVector<Constant *, 8> Ops;
+ for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
+ ++i) {
Constant *NewC = cast<Constant>(*i);
- // Recursively fold the ConstantExpr's operands.
- if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
- NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
+ // Recursively fold the ConstantExpr's operands. If we have already folded
+ // a ConstantExpr, we don't have to process it again.
+ if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
+ if (FoldedOps.insert(NewCE))
+ NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
+ }
Ops.push_back(NewC);
}
@@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
}
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified DataLayout. If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+ const DataLayout *TD,
+ const TargetLibraryInfo *TLI) {
+ SmallPtrSet<ConstantExpr *, 4> FoldedOps;
+ return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
+}
+
/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
/// specified opcode and operands. If successful, the constant result is
/// returned, if not, null is returned. Note that this function can fail when
diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
index aa5164e..1c1816d 100644
--- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 4a3c74e..bf77451 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1711,7 +1711,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
// subobject at its beginning) or function, both are pointers to one past the
// last element of the same array object, or one is a pointer to one past the
// end of one array object and the other is a pointer to the start of a
-// different array object that happens to immediately follow the first array
+// different array object that happens to immediately follow the first array
// object in the address space.)
//
// C11's version is more restrictive, however there's no reason why an argument
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index d490d54..9c0d8ac 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -364,26 +364,6 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
return true;
}
-/// \brief Compute the size of the underlying object pointed by Ptr. Returns
-/// true and the object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
- const DataLayout *TD,
- const TargetLibraryInfo *TLI,
- bool RoundToAlign) {
- if (!TD)
- return false;
-
- ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
- SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
- if (!Visitor.knownSize(Data))
- return false;
-
- Size = Data.first.getZExtValue();
- return true;
-}
-
STATISTIC(ObjectVisitorArgument,
"Number of arguments with unsolved size and offset");
@@ -409,23 +389,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
V = V->stripPointerCasts();
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // If we have already seen this instruction, bail out. Cycles can happen in
+ // unreachable code after constant propagation.
+ if (!SeenInsts.insert(I))
+ return unknown();
- if (isa<Instruction>(V) || isa<GEPOperator>(V)) {
- // Return cached value or insert unknown in cache if size of V was not
- // computed yet in order to avoid recursions in PHis.
- std::pair<CacheMapTy::iterator, bool> CacheVal =
- CacheMap.insert(std::make_pair(V, unknown()));
- if (!CacheVal.second)
- return CacheVal.first->second;
-
- SizeOffsetType Result;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- Result = visitGEPOperator(*GEP);
- else
- Result = visit(cast<Instruction>(*V));
- return CacheMap[V] = Result;
+ return visitGEPOperator(*GEP);
+ return visit(*I);
}
-
if (Argument *A = dyn_cast<Argument>(V))
return visitArgument(*A);
if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
@@ -439,6 +412,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (CE->getOpcode() == Instruction::IntToPtr)
return unknown(); // clueless
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ return visitGEPOperator(cast<GEPOperator>(*CE));
}
DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
@@ -572,21 +547,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
return unknown();
}
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) {
- if (PHI.getNumIncomingValues() == 0)
- return unknown();
-
- SizeOffsetType Ret = compute(PHI.getIncomingValue(0));
- if (!bothKnown(Ret))
- return unknown();
-
- // Verify that all PHI incoming pointers have the same size and offset.
- for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) {
- SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i));
- if (!bothKnown(EdgeData) || EdgeData != Ret)
- return unknown();
- }
- return Ret;
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
+ // too complex to analyze statically.
+ return unknown();
}
SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2240e9d..c0009cb 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -47,9 +47,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
"Number of block queries that were completely cached");
// Limit for the number of instructions to scan in a block.
-// FIXME: Figure out what a sane value is for this.
-// (500 is relatively insane.)
-static const int BlockScanLimit = 500;
+static const int BlockScanLimit = 100;
char MemoryDependenceAnalysis::ID = 0;
@@ -913,7 +911,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
SmallVectorImpl<NonLocalDepResult> &Result,
DenseMap<BasicBlock*, Value*> &Visited,
bool SkipFirstBlock) {
-
// Look up the cached info for Pointer.
ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
@@ -1001,8 +998,17 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
I != E; ++I) {
Visited.insert(std::make_pair(I->getBB(), Addr));
- if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB()))
+ if (I->getResult().isNonLocal()) {
+ continue;
+ }
+
+ if (!DT) {
+ Result.push_back(NonLocalDepResult(I->getBB(),
+ MemDepResult::getUnknown(),
+ Addr));
+ } else if (DT->isReachableFromEntry(I->getBB())) {
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+ }
}
++NumCacheCompleteNonLocalPtr;
return false;
@@ -1047,9 +1053,16 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
NumSortedEntries);
// If we got a Def or Clobber, add this to the list of results.
- if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
- Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
- continue;
+ if (!Dep.isNonLocal()) {
+ if (!DT) {
+ Result.push_back(NonLocalDepResult(BB,
+ MemDepResult::getUnknown(),
+ Pointer.getAddr()));
+ continue;
+ } else if (DT->isReachableFromEntry(BB)) {
+ Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+ continue;
+ }
}
}
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index fad5074..8577025 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -79,10 +79,43 @@ void Region::replaceExit(BasicBlock *BB) {
exit = BB;
}
+void Region::replaceEntryRecursive(BasicBlock *NewEntry) {
+ std::vector<Region *> RegionQueue;
+ BasicBlock *OldEntry = getEntry();
+
+ RegionQueue.push_back(this);
+ while (!RegionQueue.empty()) {
+ Region *R = RegionQueue.back();
+ RegionQueue.pop_back();
+
+ R->replaceEntry(NewEntry);
+ for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+ if ((*RI)->getEntry() == OldEntry)
+ RegionQueue.push_back(*RI);
+ }
+}
+
+void Region::replaceExitRecursive(BasicBlock *NewExit) {
+ std::vector<Region *> RegionQueue;
+ BasicBlock *OldExit = getExit();
+
+ RegionQueue.push_back(this);
+ while (!RegionQueue.empty()) {
+ Region *R = RegionQueue.back();
+ RegionQueue.pop_back();
+
+ R->replaceExit(NewExit);
+ for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+ if ((*RI)->getExit() == OldExit)
+ RegionQueue.push_back(*RI);
+ }
+}
+
bool Region::contains(const BasicBlock *B) const {
BasicBlock *BB = const_cast<BasicBlock*>(B);
- assert(DT->getNode(BB) && "BB not part of the dominance tree");
+ if (!DT->getNode(BB))
+ return false;
BasicBlock *entry = getEntry(), *exit = getExit();
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index 6ea915f..f876748 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3937,10 +3937,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
/// before taking the branch. For loops with multiple exits, it may not be the
/// number times that the loop header executes because the loop may exit
/// prematurely via another branch.
+///
+/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of
+/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all
+/// loop exits. getExitCount() may return an exact count for this branch
+/// assuming no-signed-wrap. The number of well-defined iterations may actually
+/// be higher than this trip count if this exit test is skipped and the loop
+/// exits via a different branch. Ideally, getExitCount() would know whether it
+/// depends on a NSW assumption, and we would only fall back to a conservative
+/// trip count in that case.
unsigned ScalarEvolution::
-getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
+getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) {
const SCEVConstant *ExitCount =
- dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
+ dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
if (!ExitCount)
return 0;
@@ -3967,8 +3976,8 @@ getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
/// As explained in the comments for getSmallConstantTripCount, this assumes
/// that control exits the loop via ExitingBlock.
unsigned ScalarEvolution::
-getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
- const SCEV *ExitCount = getExitCount(L, ExitingBlock);
+getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) {
+ const SCEV *ExitCount = getBackedgeTakenCount(L);
if (ExitCount == getCouldNotCompute())
return 1;
@@ -3997,7 +4006,7 @@ getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
}
// getExitCount - Get the expression for the number of loop iterations for which
-// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
+// this loop is guaranteed not to exit via ExitingBlock. Otherwise return
// SCEVCouldNotCompute.
const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
@@ -4382,26 +4391,36 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
// Proceed to the next level to examine the exit condition expression.
return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
ExitBr->getSuccessor(0),
- ExitBr->getSuccessor(1));
+ ExitBr->getSuccessor(1),
+ /*IsSubExpr=*/false);
}
/// ComputeExitLimitFromCond - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of ExitCond, TBB, and FBB.
+///
+/// @param IsSubExpr is true if ExitCond does not directly control the exit
+/// branch. In this case, we cannot assume that the loop only exits when the
+/// condition is true and cannot infer that failing to meet the condition prior
+/// to integer wraparound results in undefined behavior.
ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
- BasicBlock *FBB) {
+ BasicBlock *FBB,
+ bool IsSubExpr) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
- ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
- ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+ bool EitherMayExit = L->contains(TBB);
+ ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+ IsSubExpr || EitherMayExit);
+ ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+ IsSubExpr || EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- if (L->contains(TBB)) {
+ if (EitherMayExit) {
// Both conditions must be true for the loop to continue executing.
// Choose the less conservative count.
if (EL0.Exact == getCouldNotCompute() ||
@@ -4429,11 +4448,14 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
- ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
- ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+ bool EitherMayExit = L->contains(FBB);
+ ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+ IsSubExpr || EitherMayExit);
+ ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+ IsSubExpr || EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- if (L->contains(FBB)) {
+ if (EitherMayExit) {
// Both conditions must be false for the loop to continue executing.
// Choose the less conservative count.
if (EL0.Exact == getCouldNotCompute() ||
@@ -4464,7 +4486,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
- return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB);
+ return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr);
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4490,7 +4512,8 @@ ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
BasicBlock *TBB,
- BasicBlock *FBB) {
+ BasicBlock *FBB,
+ bool IsSubExpr) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Cond;
@@ -4542,7 +4565,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -4553,24 +4576,24 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
break;
}
case ICmpInst::ICMP_SLT: {
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, true);
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SGT: {
ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, true);
+ getNotSCEV(RHS), L, true, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_ULT: {
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, false);
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_UGT: {
ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, false);
+ getNotSCEV(RHS), L, false, IsSubExpr);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -5439,7 +5462,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
/// effectively V != 0. We know and take advantage of the fact that this
/// expression only being used in a comparison by zero context.
ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
@@ -5537,19 +5560,20 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
}
// If the recurrence is known not to wraparound, unsigned divide computes the
- // back edge count. We know that the value will either become zero (and thus
- // the loop terminates), that the loop will terminate through some other exit
- // condition first, or that the loop has undefined behavior. This means
- // we can't "miss" the exit value, even with nonunit stride.
+ // back edge count. (Ideally we would have an "isexact" bit for udiv). We know
+ // that the value will either become zero (and thus the loop terminates), that
+ // the loop will terminate through some other exit condition first, or that
+ // the loop has undefined behavior. This means we can't "miss" the exit
+ // value, even with nonunit stride.
//
- // FIXME: Prove that loops always exhibits *acceptable* undefined
- // behavior. Loops must exhibit defined behavior until a wrapped value is
- // actually used. So the trip count computed by udiv could be smaller than the
- // number of well-defined iterations.
- if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
- // FIXME: We really want an "isexact" bit for udiv.
+ // This is only valid for expressions that directly compute the loop exit. It
+ // is invalid for subexpressions in which the loop may exit through this
+ // branch even if this subexpression is false. In that case, the trip count
+ // computed by this udiv could be smaller than the number of well-defined
+ // iterations.
+ if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW))
return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
- }
+
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -6315,9 +6339,14 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
/// HowManyLessThans - Return the number of times a backedge containing the
/// specified less-than comparison will execute. If not computable, return
/// CouldNotCompute.
+///
+/// @param IsSubExpr is true when the LHS < RHS condition does not directly
+/// control the branch. In this case, we can only compute an iteration count for
+/// a subexpression that cannot overflow before evaluating true.
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, bool isSigned) {
+ const Loop *L, bool isSigned,
+ bool IsSubExpr) {
// Only handle: "ADDREC < LoopInvariant".
if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
@@ -6326,10 +6355,12 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
return getCouldNotCompute();
// Check to see if we have a flag which makes analysis easy.
- bool NoWrap = isSigned ?
- AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) :
- AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW));
-
+ bool NoWrap = false;
+ if (!IsSubExpr) {
+ NoWrap = AddRec->getNoWrapFlags(
+ (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
+ | SCEV::FlagNW));
+ }
if (AddRec->isAffine()) {
unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
const SCEV *Step = AddRec->getStepRecurrence(*this);
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 68e43b2..bbf3c3a 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -71,6 +71,7 @@ using namespace llvm;
// achieved by stripping the !tbaa tags from IR, but this option is sometimes
// more convenient.
static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
namespace {
/// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -109,6 +110,97 @@ namespace {
return CI->getValue()[0];
}
};
+
+ /// This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAAStructTagNode {
+ /// This node should be created with createTBAAStructTagNode.
+ const MDNode *Node;
+
+ public:
+ TBAAStructTagNode() : Node(0) {}
+ explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+
+ /// Get the MDNode for this TBAAStructTagNode.
+ const MDNode *getNode() const { return Node; }
+
+ const MDNode *getBaseType() const {
+ return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+ }
+ const MDNode *getAccessType() const {
+ return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ }
+ uint64_t getOffset() const {
+ return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ }
+ /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+ /// objects which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 4)
+ return false;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+ };
+
+ /// This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAAStructTypeNode {
+ /// This node should be created with createTBAAStructTypeNode.
+ const MDNode *Node;
+
+ public:
+ TBAAStructTypeNode() : Node(0) {}
+ explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+ /// Get the MDNode for this TBAAStructTypeNode.
+ const MDNode *getNode() const { return Node; }
+
+ /// Get this TBAAStructTypeNode's field in the type DAG with
+ /// given offset. Update the offset to be relative to the field type.
+ TBAAStructTypeNode getParent(uint64_t &Offset) const {
+ // Parent can be omitted for the root node.
+ if (Node->getNumOperands() < 2)
+ return TBAAStructTypeNode();
+
+ // Special handling for a scalar type node.
+ if (Node->getNumOperands() <= 3) {
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAAStructTypeNode();
+ return TBAAStructTypeNode(P);
+ }
+
+ // Assume the offsets are in order. We return the previous field if
+ // the current offset is bigger than the given offset.
+ unsigned TheIdx = 0;
+ for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+ uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
+ getZExtValue();
+ if (Cur > Offset) {
+ assert(Idx >= 3 &&
+ "TBAAStructTypeNode::getParent should have an offset match!");
+ TheIdx = Idx - 2;
+ break;
+ }
+ }
+ // Move along the last field.
+ if (TheIdx == 0)
+ TheIdx = Node->getNumOperands() - 2;
+ uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
+ getZExtValue();
+ Offset -= Cur;
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+ if (!P)
+ return TBAAStructTypeNode();
+ return TBAAStructTypeNode(P);
+ }
+ };
}
namespace {
@@ -137,6 +229,7 @@ namespace {
}
bool Aliases(const MDNode *A, const MDNode *B) const;
+ bool PathAliases(const MDNode *A, const MDNode *B) const;
private:
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -171,6 +264,9 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool
TypeBasedAliasAnalysis::Aliases(const MDNode *A,
const MDNode *B) const {
+ if (EnableStructPathTBAA)
+ return PathAliases(A, B);
+
// Keep track of the root node for A and B.
TBAANode RootA, RootB;
@@ -209,6 +305,67 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A,
return false;
}
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool
+TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
+ const MDNode *B) const {
+ // Keep track of the root node for A and B.
+ TBAAStructTypeNode RootA, RootB;
+ TBAAStructTagNode TagA(A), TagB(B);
+
+ // TODO: We need to check if AccessType of TagA encloses AccessType of
+ // TagB to support aggregate AccessType. If yes, return true.
+
+ // Start from the base type of A, follow the edge with the correct offset in
+ // the type DAG and adjust the offset until we reach the base type of B or
+ // until we reach the Root node.
+ // Compare the adjusted offset once we have the same base.
+
+ // Climb the type DAG from base type of A to see if we reach base type of B.
+ const MDNode *BaseA = TagA.getBaseType();
+ const MDNode *BaseB = TagB.getBaseType();
+ uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+ for (TBAAStructTypeNode T(BaseA); ; ) {
+ if (T.getNode() == BaseB)
+ // Base type of A encloses base type of B, check if the offsets match.
+ return OffsetA == OffsetB;
+
+ RootA = T;
+ // Follow the edge with the correct offset, OffsetA will be adjusted to
+ // be relative to the field type.
+ T = T.getParent(OffsetA);
+ if (!T.getNode())
+ break;
+ }
+
+ // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+ // base type of A.
+ OffsetA = TagA.getOffset();
+ for (TBAAStructTypeNode T(BaseB); ; ) {
+ if (T.getNode() == BaseA)
+ // Base type of B encloses base type of A, check if the offsets match.
+ return OffsetA == OffsetB;
+
+ RootB = T;
+ // Follow the edge with the correct offset, OffsetB will be adjusted to
+ // be relative to the field type.
+ T = T.getParent(OffsetB);
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
AliasAnalysis::AliasResult
TypeBasedAliasAnalysis::alias(const Location &LocA,
const Location &LocB) {
@@ -240,7 +397,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
- if (TBAANode(M).TypeIsImmutable())
+ if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+ (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
return true;
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -256,7 +414,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
- if (TBAANode(M).TypeIsImmutable())
+ if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+ (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
Min = OnlyReadsMemory;
return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
@@ -298,3 +457,55 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
return AliasAnalysis::getModRefInfo(CS1, CS2);
}
+
+MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+ if (!A || !B)
+ return NULL;
+
+ if (A == B)
+ return A;
+
+ // For struct-path aware TBAA, we use the access type of the tag.
+ if (EnableStructPathTBAA) {
+ A = cast_or_null<MDNode>(A->getOperand(1));
+ if (!A) return 0;
+ B = cast_or_null<MDNode>(B->getOperand(1));
+ if (!B) return 0;
+ }
+
+ SmallVector<MDNode *, 4> PathA;
+ MDNode *T = A;
+ while (T) {
+ PathA.push_back(T);
+ T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+ }
+
+ SmallVector<MDNode *, 4> PathB;
+ T = B;
+ while (T) {
+ PathB.push_back(T);
+ T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+ }
+
+ int IA = PathA.size() - 1;
+ int IB = PathB.size() - 1;
+
+ MDNode *Ret = 0;
+ while (IA >= 0 && IB >=0) {
+ if (PathA[IA] == PathB[IB])
+ Ret = PathA[IA];
+ else
+ break;
+ --IA;
+ --IB;
+ }
+ if (!EnableStructPathTBAA)
+ return Ret;
+
+ if (!Ret)
+ return 0;
+ // We need to convert from a type node to a tag node.
+ Type *Int64 = IntegerType::get(A->getContext(), 64);
+ Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
+ return MDNode::get(A->getContext(), Ops);
+}
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index f46383b..e7a9f2a 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -582,6 +582,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(optsize);
KEYWORD(readnone);
KEYWORD(readonly);
+ KEYWORD(returned);
KEYWORD(returns_twice);
KEYWORD(signext);
KEYWORD(sret);
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index c8da1f8..62d8070d 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -528,7 +528,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
if (Result) return false;
// Otherwise, create MDNode forward reference.
- MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
+ MDNode *FwdNode = MDNode::getTemporary(Context, None);
ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
if (NumberedMetadata.size() <= MID)
@@ -878,8 +878,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
// Target-independent attributes:
case lltok::kw_align: {
- // As a hack, we allow "align 2" on functions as a synonym for "alignstack
- // 2".
+ // As a hack, we allow function alignment to be initially parsed as an
+ // attribute on a function declaration/definition or added to an attribute
+ // group and later moved to the alignment field.
unsigned Alignment;
if (inAttrGrp) {
Lex.Lex();
@@ -943,6 +944,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_nest:
case lltok::kw_noalias:
case lltok::kw_nocapture:
+ case lltok::kw_returned:
case lltok::kw_sret:
HaveError |=
Error(Lex.getLoc(),
@@ -1155,21 +1157,35 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_nest: B.addAttribute(Attribute::Nest); break;
case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break;
case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break;
+ case lltok::kw_returned: B.addAttribute(Attribute::Returned); break;
case lltok::kw_signext: B.addAttribute(Attribute::SExt); break;
case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break;
case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
- case lltok::kw_alignstack: case lltok::kw_nounwind:
- case lltok::kw_alwaysinline: case lltok::kw_optsize:
- case lltok::kw_inlinehint: case lltok::kw_readnone:
- case lltok::kw_minsize: case lltok::kw_readonly:
- case lltok::kw_naked: case lltok::kw_returns_twice:
- case lltok::kw_nobuiltin: case lltok::kw_sanitize_address:
- case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory:
- case lltok::kw_noinline: case lltok::kw_sanitize_thread:
- case lltok::kw_nonlazybind: case lltok::kw_ssp:
- case lltok::kw_noredzone: case lltok::kw_sspreq:
- case lltok::kw_noreturn: case lltok::kw_uwtable:
+ case lltok::kw_alignstack:
+ case lltok::kw_alwaysinline:
+ case lltok::kw_inlinehint:
+ case lltok::kw_minsize:
+ case lltok::kw_naked:
+ case lltok::kw_nobuiltin:
+ case lltok::kw_noduplicate:
+ case lltok::kw_noimplicitfloat:
+ case lltok::kw_noinline:
+ case lltok::kw_nonlazybind:
+ case lltok::kw_noredzone:
+ case lltok::kw_noreturn:
+ case lltok::kw_nounwind:
+ case lltok::kw_optsize:
+ case lltok::kw_readnone:
+ case lltok::kw_readonly:
+ case lltok::kw_returns_twice:
+ case lltok::kw_sanitize_address:
+ case lltok::kw_sanitize_memory:
+ case lltok::kw_sanitize_thread:
+ case lltok::kw_ssp:
+ case lltok::kw_sspreq:
+ case lltok::kw_sspstrong:
+ case lltok::kw_uwtable:
HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
break;
}
@@ -1195,24 +1211,39 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
// Error handling.
- case lltok::kw_sret: case lltok::kw_nocapture:
- case lltok::kw_byval: case lltok::kw_nest:
+ case lltok::kw_align:
+ case lltok::kw_byval:
+ case lltok::kw_nest:
+ case lltok::kw_nocapture:
+ case lltok::kw_returned:
+ case lltok::kw_sret:
HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
break;
- case lltok::kw_align: case lltok::kw_noreturn:
- case lltok::kw_alignstack: case lltok::kw_nounwind:
- case lltok::kw_alwaysinline: case lltok::kw_optsize:
- case lltok::kw_inlinehint: case lltok::kw_readnone:
- case lltok::kw_minsize: case lltok::kw_readonly:
- case lltok::kw_naked: case lltok::kw_returns_twice:
- case lltok::kw_nobuiltin: case lltok::kw_sanitize_address:
- case lltok::kw_noduplicate: case lltok::kw_sanitize_memory:
- case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_thread:
- case lltok::kw_noinline: case lltok::kw_ssp:
- case lltok::kw_nonlazybind: case lltok::kw_sspreq:
- case lltok::kw_noredzone: case lltok::kw_sspstrong:
- case lltok::kw_uwtable:
+ case lltok::kw_alignstack:
+ case lltok::kw_alwaysinline:
+ case lltok::kw_inlinehint:
+ case lltok::kw_minsize:
+ case lltok::kw_naked:
+ case lltok::kw_nobuiltin:
+ case lltok::kw_noduplicate:
+ case lltok::kw_noimplicitfloat:
+ case lltok::kw_noinline:
+ case lltok::kw_nonlazybind:
+ case lltok::kw_noredzone:
+ case lltok::kw_noreturn:
+ case lltok::kw_nounwind:
+ case lltok::kw_optsize:
+ case lltok::kw_readnone:
+ case lltok::kw_readonly:
+ case lltok::kw_returns_twice:
+ case lltok::kw_sanitize_address:
+ case lltok::kw_sanitize_memory:
+ case lltok::kw_sanitize_thread:
+ case lltok::kw_ssp:
+ case lltok::kw_sspreq:
+ case lltok::kw_sspstrong:
+ case lltok::kw_uwtable:
HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
break;
}
@@ -4232,7 +4263,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
- if (!Ptr->getType()->getScalarType()->isPointerTy())
+ Type *BaseType = Ptr->getType();
+ PointerType *BasePointerType = dyn_cast<PointerType>(BaseType->getScalarType());
+ if (!BasePointerType)
return Error(Loc, "base of getelementptr must be a pointer");
SmallVector<Value*, 16> Indices;
@@ -4257,7 +4290,10 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
Indices.push_back(Val);
}
- if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
+ if (!Indices.empty() && !BasePointerType->getElementType()->isSized())
+ return Error(Loc, "base element of getelementptr must be sized");
+
+ if (!GetElementPtrInst::getIndexedType(BaseType, Indices))
return Error(Loc, "invalid getelementptr indices");
Inst = GetElementPtrInst::Create(Ptr, Indices);
if (InBounds)
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index cd25ba3..3bf54fa 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -114,6 +114,7 @@ namespace lltok {
kw_optsize,
kw_readnone,
kw_readonly,
+ kw_returned,
kw_returns_twice,
kw_signext,
kw_ssp,
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
index 5cd6c55..23630e5 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -10,6 +10,7 @@
#include "llvm-c/BitReader.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cstring>
#include <string>
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f348843..e6ff4b4 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -405,7 +405,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
}
// Create and return a placeholder, which will later be RAUW'd.
- Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>());
+ Value *V = MDNode::getTemporary(Context, None);
MDValuePtrs[Idx] = V;
return V;
}
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
index 9f51c35..985208c 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
@@ -9,6 +9,7 @@
#include "llvm-c/BitWriter.h"
#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index dd7282c..4731af5 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -201,62 +201,161 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
}
}
+static bool isNoopBitcast(Type *T1, Type *T2,
+ const TargetLowering& TLI) {
+ return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
+ (isa<VectorType>(T1) && isa<VectorType>(T2) &&
+ TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2)));
+}
-/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look
-/// through it (and any transitive noop operands to it) and return its input
-/// value. This is used to determine if a tail call can be formed.
-///
-static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
- // If V is not an instruction, it can't be looked through.
- const Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V;
-
- Value *Op = I->getOperand(0);
+/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop
+/// (i.e., lowers to no machine code), look through it (and any transitive noop
+/// operands to it) and check if it has the same noop input value. This is
+/// used to determine if a tail call can be formed.
+static bool sameNoopInput(const Value *V1, const Value *V2,
+ SmallVectorImpl<unsigned> &Els1,
+ SmallVectorImpl<unsigned> &Els2,
+ const TargetLowering &TLI) {
+ using std::swap;
+ bool swapParity = false;
+ bool equalEls = Els1 == Els2;
+ while (true) {
+ if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) {
+ if (swapParity)
+ // Revert to original Els1 and Els2 to avoid confusing recursive calls
+ swap(Els1, Els2);
+ return true;
+ }
- // Look through truly no-op truncates.
- if (isa<TruncInst>(I) &&
- TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType()))
- return getNoopInput(I->getOperand(0), TLI);
-
- // Look through truly no-op bitcasts.
- if (isa<BitCastInst>(I)) {
- // No type change at all.
- if (Op->getType() == I->getType())
- return getNoopInput(Op, TLI);
+ // Try to look through V1; if V1 is not an instruction, it can't be looked
+ // through.
+ const Instruction *I = dyn_cast<Instruction>(V1);
+ const Value *NoopInput = 0;
+ if (I != 0 && I->getNumOperands() > 0) {
+ Value *Op = I->getOperand(0);
+ if (isa<TruncInst>(I)) {
+ // Look through truly no-op truncates.
+ if (TLI.isTruncateFree(Op->getType(), I->getType()))
+ NoopInput = Op;
+ } else if (isa<BitCastInst>(I)) {
+ // Look through truly no-op bitcasts.
+ if (isNoopBitcast(Op->getType(), I->getType(), TLI))
+ NoopInput = Op;
+ } else if (isa<GetElementPtrInst>(I)) {
+ // Look through getelementptr
+ if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ NoopInput = Op;
+ } else if (isa<IntToPtrInst>(I)) {
+ // Look through inttoptr.
+ // Make sure this isn't a truncating or extending cast. We could
+ // support this eventually, but don't bother for now.
+ if (!isa<VectorType>(I->getType()) &&
+ TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(Op->getType())->getBitWidth())
+ NoopInput = Op;
+ } else if (isa<PtrToIntInst>(I)) {
+ // Look through ptrtoint.
+ // Make sure this isn't a truncating or extending cast. We could
+ // support this eventually, but don't bother for now.
+ if (!isa<VectorType>(I->getType()) &&
+ TLI.getPointerTy().getSizeInBits() ==
+ cast<IntegerType>(I->getType())->getBitWidth())
+ NoopInput = Op;
+ } else if (isa<CallInst>(I)) {
+ // Look through call
+ for (User::const_op_iterator i = I->op_begin(),
+ // Skip Callee
+ e = I->op_end() - 1;
+ i != e; ++i) {
+ unsigned attrInd = i - I->op_begin() + 1;
+ if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+ isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+ NoopInput = *i;
+ break;
+ }
+ }
+ } else if (isa<InvokeInst>(I)) {
+ // Look through invoke
+ for (User::const_op_iterator i = I->op_begin(),
+ // Skip BB, BB, Callee
+ e = I->op_end() - 3;
+ i != e; ++i) {
+ unsigned attrInd = i - I->op_begin() + 1;
+ if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+ isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+ NoopInput = *i;
+ break;
+ }
+ }
+ }
+ }
- // Pointer to pointer cast.
- if (Op->getType()->isPointerTy() && I->getType()->isPointerTy())
- return getNoopInput(Op, TLI);
-
- if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) &&
- TLI.isTypeLegal(EVT::getEVT(Op->getType())) &&
- TLI.isTypeLegal(EVT::getEVT(I->getType())))
- return getNoopInput(Op, TLI);
- }
-
- // Look through inttoptr.
- if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) {
- // Make sure this isn't a truncating or extending cast. We could support
- // this eventually, but don't bother for now.
- if (TLI.getPointerTy().getSizeInBits() ==
- cast<IntegerType>(Op->getType())->getBitWidth())
- return getNoopInput(Op, TLI);
- }
+ if (NoopInput) {
+ V1 = NoopInput;
+ continue;
+ }
- // Look through ptrtoint.
- if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) {
- // Make sure this isn't a truncating or extending cast. We could support
- // this eventually, but don't bother for now.
- if (TLI.getPointerTy().getSizeInBits() ==
- cast<IntegerType>(I->getType())->getBitWidth())
- return getNoopInput(Op, TLI);
+ // If we already swapped, avoid infinite loop
+ if (swapParity)
+ break;
+
+ // Otherwise, swap V1<->V2, Els1<->Els2
+ swap(V1, V2);
+ swap(Els1, Els2);
+ swapParity = !swapParity;
}
+ for (unsigned n = 0; n < 2; ++n) {
+ if (isa<InsertValueInst>(V1)) {
+ if (isa<StructType>(V1->getType())) {
+ // Look through insertvalue
+ unsigned i, e;
+ for (i = 0, e = cast<StructType>(V1->getType())->getNumElements();
+ i != e; ++i) {
+ const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i);
+ if (InScalar == 0)
+ break;
+ Els1.push_back(i);
+ if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) {
+ Els1.pop_back();
+ break;
+ }
+ Els1.pop_back();
+ }
+ if (i == e) {
+ if (swapParity)
+ swap(Els1, Els2);
+ return true;
+ }
+ }
+ } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) {
+ const ExtractValueInst *EVI = cast<ExtractValueInst>(V1);
+ unsigned i = Els1.back();
+ // If the scalar value being inserted is an extractvalue of the right
+ // index from the call, then everything is good.
+ if (isa<StructType>(EVI->getOperand(0)->getType()) &&
+ EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) {
+ // Look through extractvalue
+ Els1.pop_back();
+ if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) {
+ Els1.push_back(i);
+ if (swapParity)
+ swap(Els1, Els2);
+ return true;
+ }
+ Els1.push_back(i);
+ }
+ }
- // Otherwise it's not something we can look through.
- return V;
-}
+ swap(V1, V2);
+ swap(Els1, Els2);
+ swapParity = !swapParity;
+ }
+ if (swapParity)
+ swap(Els1, Els2);
+ return false;
+}
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
@@ -264,7 +363,8 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
+bool llvm::isInTailCallPosition(ImmutableCallSite CS,
+ const TargetLowering &TLI) {
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -322,28 +422,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
return false;
- // Otherwise, make sure the unmodified return value of I is the return value.
- // We handle two cases: multiple return values + scalars.
- Value *RetVal = Ret->getOperand(0);
- if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
- // Handle scalars first.
- return getNoopInput(Ret->getOperand(0), TLI) == I;
-
- // If this is an aggregate return, look through the insert/extract values and
- // see if each is transparent.
- for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
- i != e; ++i) {
- const Value *InScalar = FindInsertedValue(RetVal, i);
- if (InScalar == 0) return false;
- InScalar = getNoopInput(InScalar, TLI);
-
- // If the scalar value being inserted is an extractvalue of the right index
- // from the call, then everything is good.
- const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
- if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
- EVI->getIndices()[0] != i)
- return false;
- }
-
- return true;
+ // Otherwise, make sure the return value and I have the same value
+ SmallVector<unsigned, 4> Els1, Els2;
+ return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI);
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d4a745d..84162ac 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -133,9 +133,13 @@ const DataLayout &AsmPrinter::getDataLayout() const {
return *TM.getDataLayout();
}
+StringRef AsmPrinter::getTargetTriple() const {
+ return TM.getTargetTriple();
+}
+
/// getCurrentSection() - Return the current section we are emitting to.
const MCSection *AsmPrinter::getCurrentSection() const {
- return OutStreamer.getCurrentSection();
+ return OutStreamer.getCurrentSection().first;
}
@@ -813,7 +817,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
// caller might be in the middle of an dwarf expression. We should
// probably assert that Reg >= 0 once debug info generation is more mature.
- if (int Offset = MLoc.getOffset()) {
+ if (MLoc.isIndirect()) {
if (Reg < 32) {
OutStreamer.AddComment(
dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
@@ -824,7 +828,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
OutStreamer.AddComment(Twine(Reg));
EmitULEB128(Reg);
}
- EmitSLEB128(Offset);
+ EmitSLEB128(MLoc.getOffset());
} else {
if (Reg < 32) {
OutStreamer.AddComment(
@@ -1213,7 +1217,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
if (GV->getName() == "llvm.used") {
if (MAI->hasNoDeadStrip()) // No need to emit this at all.
- EmitLLVMUsedList(GV->getInitializer());
+ EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
return true;
}
@@ -1256,11 +1260,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
/// global in the specified llvm.used list for which emitUsedDirectiveFor
/// is true, as being used with this directive.
-void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
// Should be an array of 'i8*'.
- const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
- if (InitList == 0) return;
-
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
const GlobalValue *GV =
dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 156acac..31e42d4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -141,7 +141,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
const MCSymbol *SectionLabel) const {
// On COFF targets, we have to emit the special .secrel32 directive.
- if (MAI->getDwarfSectionOffsetDirective()) {
+ if (MAI->needsDwarfSectionOffsetDirective()) {
OutStreamer.EmitCOFFSecRel32(Label);
return;
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 57e0acd..673867a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,8 +112,9 @@ DIE::~DIE() {
delete Children[i];
}
-/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
-DIE *DIE::getCompileUnit() const{
+/// Climb up the parent chain to get the compile unit DIE to which this DIE
+/// belongs.
+DIE *DIE::getCompileUnit() const {
DIE *p = getParent();
while (p) {
if (p->getTag() == dwarf::DW_TAG_compile_unit)
@@ -124,8 +125,7 @@ DIE *DIE::getCompileUnit() const{
}
#ifndef NDEBUG
-void DIE::print(raw_ostream &O, unsigned IncIndent) {
- IndentCount += IncIndent;
+void DIE::print(raw_ostream &O, unsigned IndentCount) const {
const std::string Indent(IndentCount, ' ');
bool isBlock = Abbrev.getTag() == 0;
@@ -164,11 +164,10 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) {
IndentCount -= 2;
for (unsigned j = 0, M = Children.size(); j < M; ++j) {
- Children[j]->print(O, 4);
+ Children[j]->print(O, IndentCount+4);
}
if (!isBlock) O << "\n";
- IndentCount -= IncIndent;
}
void DIE::dump() {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
index c332aa2..3c06001 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h
@@ -139,8 +139,7 @@ namespace llvm {
mutable unsigned IndentCount;
public:
explicit DIE(unsigned Tag)
- : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
- IndentCount(0) {}
+ : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {}
virtual ~DIE();
// Accessors.
@@ -179,7 +178,7 @@ namespace llvm {
}
#ifndef NDEBUG
- void print(raw_ostream &O, unsigned IncIndent = 0);
+ void print(raw_ostream &O, unsigned IndentCount = 0) const;
void dump();
#endif
};
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index f9b6f94..89abcff 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -685,7 +685,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
return true;
}
-/// addTemplateParams - Add template parameters in buffer.
+/// addTemplateParams - Add template parameters into buffer.
void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
// Add template parameters.
for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
@@ -707,7 +707,7 @@ DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
return getOrCreateNameSpace(DINameSpace(Context));
else if (Context.isSubprogram())
return getOrCreateSubprogramDIE(DISubprogram(Context));
- else
+ else
return getDIE(Context);
}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 2b180c6..8f08c63 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -94,9 +94,6 @@ class CompileUnit {
/// DWARF version doesn't handle the language, return -1.
int64_t getDefaultLowerBound() const;
- /// getOrCreateContextDIE - Get context owner's DIE.
- DIE *getOrCreateContextDIE(DIDescriptor Context);
-
public:
CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
DwarfUnits *);
@@ -372,6 +369,9 @@ public:
/// createStaticMemberDIE - Create new static data member DIE.
DIE *createStaticMemberDIE(DIDerivedType DT);
+ /// getOrCreateContextDIE - Get context owner's DIE.
+ DIE *getOrCreateContextDIE(DIDescriptor Context);
+
private:
// DIEValueAllocator - All DIEValues are allocated through this allocator.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 11eb983..1e706cc 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -94,6 +94,12 @@ static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
namespace {
const char *DWARFGroupName = "DWARF Emission";
const char *DbgTimerName = "DWARF Debug Writer";
+
+ struct CompareFirst {
+ template <typename T> bool operator()(const T &lhs, const T &rhs) const {
+ return lhs.first < rhs.first;
+ }
+ };
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -170,12 +176,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
DwarfStrSectionSym = TextSectionSym = 0;
DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
+ DwarfAddrSectionSym = 0;
DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
// Turn on accelerator tables and older gdb compatibility
// for Darwin.
- bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+ bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin();
if (DarwinGDBCompat == Default) {
if (IsDarwin)
IsDarwinGDBCompat = true;
@@ -596,9 +603,16 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
}
else {
// There is no need to emit empty lexical block DIE.
- if (Children.empty())
+ std::pair<ImportedEntityMap::const_iterator,
+ ImportedEntityMap::const_iterator> Range = std::equal_range(
+ ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
+ std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0),
+ CompareFirst());
+ if (Children.empty() && Range.first == Range.second)
return NULL;
ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
+ for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i)
+ constructImportedModuleDIE(TheCU, i->second, ScopeDIE);
}
if (!ScopeDIE) return NULL;
@@ -643,7 +657,7 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
// We look up the CUID/file/dir by concatenating them with a zero byte.
SmallString<128> NamePair;
- NamePair += CUID;
+ NamePair += utostr(CUID);
NamePair += '\0';
NamePair += DirName;
NamePair += '\0'; // Zero bytes are not allowed in paths.
@@ -681,9 +695,12 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit.getLanguage());
NewCU->addString(Die, dwarf::DW_AT_name, FN);
+
// 2.17.1 requires that we use DW_AT_low_pc for a single entry point
- // into an entity. We're using 0 (or a NULL label) for this.
- NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+ // into an entity. We're using 0 (or a NULL label) for this. For
+ // split dwarf it's in the skeleton CU so omit it here.
+ if (!useSplitDwarf())
+ NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
@@ -691,21 +708,32 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
NewCU->getUniqueID());
+ // Use a single line table if we are using .loc and generating assembly.
+ bool UseTheFirstCU =
+ (Asm->TM.hasMCUseLoc() &&
+ Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) ||
+ (NewCU->getUniqueID() == 0);
+
// DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section.
+ // compile unit in debug_line section. For split dwarf this is
+ // left in the skeleton CU and so not included.
// The line table entries are not always emitted in assembly, so it
// is not okay to use line_table_start here.
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
- NewCU->getUniqueID() == 0 ?
- Asm->GetTempSymbol("section_line") : LineTableStartSym);
- else if (NewCU->getUniqueID() == 0)
- NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
- else
- NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
- LineTableStartSym, DwarfLineSectionSym);
+ if (!useSplitDwarf()) {
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ UseTheFirstCU ?
+ Asm->GetTempSymbol("section_line") : LineTableStartSym);
+ else if (UseTheFirstCU)
+ NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ else
+ NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+ LineTableStartSym, DwarfLineSectionSym);
+ }
- if (!CompilationDir.empty())
+ // If we're using split dwarf the compilation dir is going to be in the
+ // skeleton CU and so we don't need to duplicate it here.
+ if (!useSplitDwarf() && !CompilationDir.empty())
NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
if (DIUnit.isOptimized())
NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
@@ -754,6 +782,41 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
TheCU->addGlobalName(SP.getName(), SubprogramDie);
}
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU,
+ const MDNode *N) {
+ DIImportedModule Module(N);
+ if (!Module.Verify())
+ return;
+ if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext()))
+ constructImportedModuleDIE(TheCU, Module, D);
+}
+
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N,
+ DIE *Context) {
+ DIImportedModule Module(N);
+ if (!Module.Verify())
+ return;
+ return constructImportedModuleDIE(TheCU, Module, Context);
+}
+
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU,
+ const DIImportedModule &Module,
+ DIE *Context) {
+ assert(Module.Verify() &&
+ "Use one of the MDNode * overloads to handle invalid metadata");
+ assert(Context && "Should always have a context for an imported_module");
+ DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module);
+ TheCU->insertDIE(Module, IMDie);
+ DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace());
+ unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(),
+ Module.getContext().getDirectory(),
+ TheCU->getUniqueID());
+ TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID);
+ TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber());
+ TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie);
+ Context->addChild(IMDie);
+}
+
// Emit all Dwarf sections that should come prior to the content. Create
// global DIEs and emit initial debug info sections. This is invoked by
// the target AsmPrinter.
@@ -775,6 +838,13 @@ void DwarfDebug::beginModule() {
for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
DICompileUnit CUNode(CU_Nodes->getOperand(i));
CompileUnit *CU = constructCompileUnit(CUNode);
+ DIArray ImportedModules = CUNode.getImportedModules();
+ for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i)
+ ScopesWithImportedEntities.push_back(std::make_pair(
+ DIImportedModule(ImportedModules.getElement(i)).getContext(),
+ ImportedModules.getElement(i)));
+ std::sort(ScopesWithImportedEntities.begin(),
+ ScopesWithImportedEntities.end(), CompareFirst());
DIArray GVs = CUNode.getGlobalVariables();
for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
CU->createGlobalVariableDIE(GVs.getElement(i));
@@ -787,11 +857,16 @@ void DwarfDebug::beginModule() {
DIArray RetainedTypes = CUNode.getRetainedTypes();
for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+ // Emit imported_modules last so that the relevant context is already
+ // available.
+ for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i)
+ constructImportedModuleDIE(CU, ImportedModules.getElement(i));
// If we're splitting the dwarf out now that we've got the entire
// CU then construct a skeleton CU based upon it.
if (useSplitDwarf()) {
- // This should be a unique identifier when we want to build .dwp files.
- CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+ // This should be a unique identifier when we want to build .dwp files.
+ CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
+ dwarf::DW_FORM_data8, 0);
// Now construct the skeleton CU associated.
constructSkeletonCU(CUNode);
}
@@ -1099,7 +1174,13 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
}
if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
MachineLocation MLoc;
- MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ // TODO: Currently an offset of 0 in a DBG_VALUE means
+ // we need to generate a direct register value.
+ // There is no way to specify an indirect value with offset 0.
+ if (MI->getOperand(1).getImm() == 0)
+ MLoc.set(MI->getOperand(0).getReg());
+ else
+ MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
}
if (MI->getOperand(0).isImm())
@@ -1366,7 +1447,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
assert(TheCU && "Unable to find compile unit!");
- Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+ if (Asm->TM.hasMCUseLoc() &&
+ Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+ // Use a single line table if we are using .loc and generating assembly.
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+ else
+ Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
FunctionBeginSym = Asm->GetTempSymbol("func_begin",
Asm->getFunctionNumber());
@@ -1768,9 +1854,12 @@ void DwarfDebug::emitSectionLabels() {
emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
DwarfStrSectionSym =
emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
- if (useSplitDwarf())
+ if (useSplitDwarf()) {
DwarfStrDWOSectionSym =
emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+ DwarfAddrSectionSym =
+ emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
+ }
DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
"debug_range");
@@ -2538,9 +2627,14 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
// This should be a unique identifier when we want to build .dwp files.
NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
- // FIXME: The addr base should be relative for each compile unit, however,
- // this one is going to be 0 anyhow.
- NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
+ // Relocate to the beginning of the addr_base section, else 0 for the
+ // beginning of the one for this compile unit.
+ if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset,
+ DwarfAddrSectionSym);
+ else
+ NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base,
+ dwarf::DW_FORM_sec_offset, 0);
// 2.17.1 requires that we use DW_AT_low_pc for a single entry point
// into an entity. We're using 0, or a NULL label for this.
@@ -2548,6 +2642,7 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section.
+ // FIXME: Should handle multiple compile units.
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
DwarfLineSectionSym);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 81e345e..24f758d 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -392,7 +392,7 @@ class DwarfDebug {
// section offsets and are created by EmitSectionLabels.
MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
- MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
+ MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
@@ -433,6 +433,10 @@ class DwarfDebug {
// Holder for the skeleton information.
DwarfUnits SkeletonHolder;
+ typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
+ ImportedEntityMap;
+ ImportedEntityMap ScopesWithImportedEntities;
+
private:
void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -555,6 +559,18 @@ private:
/// \brief Construct subprogram DIE.
void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
+ /// \brief Construct import_module DIE.
+ void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N);
+
+ /// \brief Construct import_module DIE.
+ void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N,
+ DIE *Context);
+
+ /// \brief Construct import_module DIE.
+ void constructImportedModuleDIE(CompileUnit *TheCU,
+ const DIImportedModule &Module,
+ DIE *Context);
+
/// \brief Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
/// source line list.
diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
index 012ff8a..4a99184 100644
--- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -204,20 +204,25 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
+ // Assume that floating point arithmetic operations cost twice as much as
+ // integer operations.
+ unsigned OpCost = (IsFloat ? 2 : 1);
+
if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
// The operation is legal. Assume it costs 1.
- // If the type is split to multiple registers, assume that thre is some
+ // If the type is split to multiple registers, assume that there is some
// overhead to this.
// TODO: Once we have extract/insert subvector cost we need to use them.
if (LT.first > 1)
- return LT.first * 2;
- return LT.first * 1;
+ return LT.first * 2 * OpCost;
+ return LT.first * 1 * OpCost;
}
if (!TLI->isOperationExpand(ISD, LT.second)) {
// If the operation is custom lowered then assume
// thare the code is twice as expensive.
- return LT.first * 2;
+ return LT.first * 2 * OpCost;
}
// Else, assume that we need to scalarize this op.
@@ -230,7 +235,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
}
// We don't know anything about this scalar instruction.
- return 1;
+ return OpCost;
}
unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
index dee339a..38ae17d 100644
--- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -117,7 +117,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
float totalWeight = 0;
SmallPtrSet<MachineInstr*, 8> visited;
- // Find the best physreg hist and the best virtreg hint.
+ // Find the best physreg hint and the best virtreg hint.
float bestPhys = 0, bestVirt = 0;
unsigned hintPhys = 0, hintVirt = 0;
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index f1d4ace..75f4b96 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -32,7 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
// No stack is used.
StackOffset = 0;
- clearFirstByValReg();
+ clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
}
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 35ec68d..c641991 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index 9958d7d..8264d6d 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -1039,6 +1039,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
return false;
}
+ if (CvtBBI->BB->hasAddressTaken())
+ // Conservatively abort if-conversion if BB's address is taken.
+ return false;
+
if (Kind == ICSimpleFalse)
if (TII->ReverseBranchCondition(Cond))
llvm_unreachable("Unable to reverse branch condition!");
@@ -1054,6 +1058,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+ // explicitly remove CvtBBI as a successor.
+ BBI.BB->removeSuccessor(CvtBBI->BB);
} else {
PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
@@ -1112,6 +1120,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
return false;
}
+ if (CvtBBI->BB->hasAddressTaken())
+ // Conservatively abort if-conversion if BB's address is taken.
+ return false;
+
if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
if (TII->ReverseBranchCondition(Cond))
llvm_unreachable("Unable to reverse branch condition!");
@@ -1146,6 +1158,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+ // explicitly remove CvtBBI as a successor.
+ BBI.BB->removeSuccessor(CvtBBI->BB);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
@@ -1176,7 +1192,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// block. By not merging them, we make it possible to iteratively
// ifcvt the blocks.
if (!HasEarlyExit &&
- NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+ NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
+ !NextBBI->BB->hasAddressTaken()) {
MergeBlocks(BBI, *NextBBI);
FalseBBDead = true;
} else {
@@ -1226,6 +1243,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
return false;
}
+ if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken())
+ // Conservatively abort if-conversion if either BB has its address taken.
+ return false;
+
// Put the predicated instructions from the 'true' block before the
// instructions from the 'false' block, unless the true block would clobber
// the predicate, in which case, do the opposite.
@@ -1374,7 +1395,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// tail, add a unconditional branch to it.
if (TailBB) {
BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
- bool CanMergeTail = !TailBBI.HasFallThrough;
+ bool CanMergeTail = !TailBBI.HasFallThrough &&
+ !TailBBI.BB->hasAddressTaken();
// There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
// check if there are any other predecessors besides those.
unsigned NumPreds = TailBB->pred_size();
@@ -1543,6 +1565,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
/// i.e., when FromBBI's branch is being moved, add those successor edges to
/// ToBBI.
void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+ assert(!FromBBI.BB->hasAddressTaken() &&
+ "Removing a BB whose address is taken!");
+
ToBBI.BB->splice(ToBBI.BB->end(),
FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index c6d1a18..35295fe 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -955,18 +955,21 @@ void InlineSpiller::reMaterializeAll() {
Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
// Get rid of deleted and empty intervals.
- for (unsigned i = RegsToSpill.size(); i != 0; --i) {
- unsigned Reg = RegsToSpill[i-1];
- if (!LIS.hasInterval(Reg)) {
- RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ unsigned ResultPos = 0;
+ for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+ unsigned Reg = RegsToSpill[i];
+ if (!LIS.hasInterval(Reg))
continue;
- }
+
LiveInterval &LI = LIS.getInterval(Reg);
- if (!LI.empty())
+ if (LI.empty()) {
+ Edit->eraseVirtReg(Reg);
continue;
- Edit->eraseVirtReg(Reg);
- RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+ }
+
+ RegsToSpill[ResultPos++] = Reg;
}
+ RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
}
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 07f0ccf..d894f66 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -453,6 +453,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
break;
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ // Just drop the annotation, but forward the value
+ CI->replaceAllUsesWith(CI->getOperand(0));
+ break;
+
case Intrinsic::var_annotation:
break; // Strip out annotate intrinsic
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 352ef94..26a1176 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -46,13 +46,16 @@ namespace {
class FrameRef {
MachineBasicBlock::iterator MI; // Instr referencing the frame
int64_t LocalOffset; // Local offset of the frame idx referenced
+ int FrameIdx; // The frame index
public:
- FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
- MI(I), LocalOffset(Offset) {}
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) :
+ MI(I), LocalOffset(Offset), FrameIdx(Idx) {}
bool operator<(const FrameRef &RHS) const {
return LocalOffset < RHS.LocalOffset;
}
- MachineBasicBlock::iterator getMachineInstr() { return MI; }
+ MachineBasicBlock::iterator getMachineInstr() const { return MI; }
+ int64_t getLocalOffset() const { return LocalOffset; }
+ int getFrameIndex() const { return FrameIdx; }
};
class LocalStackSlotPass: public MachineFunctionPass {
@@ -194,22 +197,15 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
static inline bool
-lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
- std::pair<unsigned, int64_t> &RegOffset,
+lookupCandidateBaseReg(int64_t BaseOffset,
int64_t FrameSizeAdjust,
int64_t LocalFrameOffset,
const MachineInstr *MI,
const TargetRegisterInfo *TRI) {
- unsigned e = Regs.size();
- for (unsigned i = 0; i < e; ++i) {
- RegOffset = Regs[i];
- // Check if the relative offset from the where the base register references
- // to the target address is in range for the instruction.
- int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
- if (TRI->isFrameOffsetLegal(MI, Offset))
- return true;
- }
- return false;
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
+ return TRI->isFrameOffsetLegal(MI, Offset);
}
bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -233,9 +229,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// choose the first one).
SmallVector<FrameRef, 64> FrameReferenceInsns;
- // A base register definition is a register + offset pair.
- SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
-
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
MachineInstr *MI = I;
@@ -258,8 +251,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// Don't try this with values not in the local block.
if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
break;
+ int Idx = MI->getOperand(i).getIndex();
+ int64_t LocalOffset = LocalOffsets[Idx];
+ if (!TRI->needsFrameBaseReg(MI, LocalOffset))
+ break;
FrameReferenceInsns.
- push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+ push_back(FrameRef(MI, LocalOffset, Idx));
break;
}
}
@@ -271,86 +268,106 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
MachineBasicBlock *Entry = Fn.begin();
+ unsigned BaseReg = 0;
+ int64_t BaseOffset = 0;
+
// Loop through the frame references and allocate for them as necessary.
for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
- MachineBasicBlock::iterator I =
- FrameReferenceInsns[ref].getMachineInstr();
+ FrameRef &FR = FrameReferenceInsns[ref];
+ MachineBasicBlock::iterator I = FR.getMachineInstr();
MachineInstr *MI = I;
- for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
- // Consider replacing all frame index operands that reference
- // an object allocated in the local block.
- if (MI->getOperand(idx).isFI()) {
- int FrameIdx = MI->getOperand(idx).getIndex();
-
- assert(MFI->isObjectPreAllocated(FrameIdx) &&
- "Only pre-allocated locals expected!");
-
- DEBUG(dbgs() << "Considering: " << *MI);
- if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
- unsigned BaseReg = 0;
- int64_t Offset = 0;
- int64_t FrameSizeAdjust =
- StackGrowsDown ? MFI->getLocalFrameSize() : 0;
-
- DEBUG(dbgs() << " Replacing FI in: " << *MI);
-
- // If we have a suitable base register available, use it; otherwise
- // create a new one. Note that any offset encoded in the
- // instruction itself will be taken into account by the target,
- // so we don't have to adjust for it here when reusing a base
- // register.
- std::pair<unsigned, int64_t> RegOffset;
- if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
- FrameSizeAdjust,
- LocalOffsets[FrameIdx],
- MI, TRI)) {
- DEBUG(dbgs() << " Reusing base register " <<
- RegOffset.first << "\n");
- // We found a register to reuse.
- BaseReg = RegOffset.first;
- Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
- RegOffset.second;
- } else {
- // No previously defined register was in range, so create a
- // new one.
- int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
- const MachineFunction *MF = MI->getParent()->getParent();
- const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
- BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
-
- DEBUG(dbgs() << " Materializing base register " << BaseReg <<
- " at frame local offset " <<
- LocalOffsets[FrameIdx] + InstrOffset << "\n");
-
- // Tell the target to insert the instruction to initialize
- // the base register.
- // MachineBasicBlock::iterator InsertionPt = Entry->begin();
- TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
- InstrOffset);
-
- // The base register already includes any offset specified
- // by the instruction, so account for that so it doesn't get
- // applied twice.
- Offset = -InstrOffset;
-
- int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
- InstrOffset;
- BaseRegisters.push_back(
- std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
- ++NumBaseRegisters;
- UsedBaseReg = true;
- }
- assert(BaseReg != 0 && "Unable to allocate virtual base register!");
-
- // Modify the instruction to use the new base register rather
- // than the frame index operand.
- TRI->resolveFrameIndex(I, BaseReg, Offset);
- DEBUG(dbgs() << "Resolved: " << *MI);
-
- ++NumReplacements;
- }
+ int64_t LocalOffset = FR.getLocalOffset();
+ int FrameIdx = FR.getFrameIndex();
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << *MI);
+
+ unsigned idx = 0;
+ for (unsigned f = MI->getNumOperands(); idx != f; ++idx) {
+ if (!MI->getOperand(idx).isFI())
+ continue;
+
+ if (FrameIdx == I->getOperand(idx).getIndex())
+ break;
+ }
+
+ assert(idx < MI->getNumOperands() && "Cannot find FI operand");
+
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << *MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
+ LocalOffset, MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
+ // We found a register to reuse.
+ Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
+ } else {
+ // No previously defined register was in range, so create a // new one.
+
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+
+ int64_t PrevBaseOffset = BaseOffset;
+ BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
+
+ // We'd like to avoid creating single-use virtual base registers.
+ // Because the FrameRefs are in sorted order, and we've already
+ // processed all FrameRefs before this one, just check whether or not
+ // the next FrameRef will be able to reuse this new register. If not,
+ // then don't bother creating it.
+ bool CanReuse = false;
+ for (int refn = ref + 1; refn < e; ++refn) {
+ FrameRef &FRN = FrameReferenceInsns[refn];
+ MachineBasicBlock::iterator J = FRN.getMachineInstr();
+ MachineInstr *MIN = J;
+
+ CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
+ FRN.getLocalOffset(), MIN, TRI);
+ break;
}
+
+ if (!CanReuse) {
+ BaseOffset = PrevBaseOffset;
+ continue;
+ }
+
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " << LocalOffset + InstrOffset << "\n");
+
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
}
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(I, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << *MI);
+
+ ++NumReplacements;
}
+
return UsedBaseReg;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 898e165..78e9950 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
: BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
- AddressTaken(false) {
+ AddressTaken(false), CachedMCSymbol(NULL) {
Insts.Parent = this;
}
@@ -48,12 +48,16 @@ MachineBasicBlock::~MachineBasicBlock() {
/// getSymbol - Return the MCSymbol for this basic block.
///
MCSymbol *MachineBasicBlock::getSymbol() const {
- const MachineFunction *MF = getParent();
- MCContext &Ctx = MF->getContext();
- const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
- return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
- Twine(MF->getFunctionNumber()) + "_" +
- Twine(getNumber()));
+ if (!CachedMCSymbol) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+ CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) +
+ "_" + Twine(getNumber()));
+ }
+
+ return CachedMCSymbol;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index cd948e2..bfba503 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -52,6 +53,11 @@ STATISTIC(CondBranchTakenFreq,
STATISTIC(UncondBranchTakenFreq,
"Potential frequency of taking unconditional branches");
+static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
+ cl::desc("Force the alignment of all "
+ "blocks in the function."),
+ cl::init(0), cl::Hidden);
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
@@ -1088,6 +1094,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
BlockToChain.clear();
ChainAllocator.DestroyAll();
+ if (AlignAllBlock)
+ // Align all of the blocks in the function to a specific alignment.
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ FI->setAlignment(AlignAllBlock);
+
// We always return true as we have no way to track whether the final order
// differs from the original order.
return true;
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 0ea9ae0..8af9d05 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -326,8 +326,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) {
if (!GV || !GV->hasInitializer()) return;
// Should be an array of 'i8*'.
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (InitList == 0) return;
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
if (const Function *F =
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 1af00e8..68372f6 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -15,6 +15,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/raw_os_ostream.h"
+
using namespace llvm;
MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
@@ -106,13 +108,59 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
void MachineRegisterInfo::clearVirtRegs() {
#ifndef NDEBUG
- for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
- assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
- "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (!VRegInfo[Reg].second)
+ continue;
+ verifyUseList(Reg);
+ llvm_unreachable("Remaining virtual register operands");
+ }
#endif
VRegInfo.clear();
}
+void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
+#ifndef NDEBUG
+ bool Valid = true;
+ for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
+ MachineOperand *MO = &I.getOperand();
+ MachineInstr *MI = MO->getParent();
+ if (!MI) {
+ errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+ << " has no parent instruction.\n";
+ Valid = false;
+ }
+ MachineOperand *MO0 = &MI->getOperand(0);
+ unsigned NumOps = MI->getNumOperands();
+ if (!(MO >= MO0 && MO < MO0+NumOps)) {
+ errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+ << " doesn't belong to parent MI: " << *MI;
+ Valid = false;
+ }
+ if (!MO->isReg()) {
+ errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO
+ << " is not a register\n";
+ Valid = false;
+ }
+ if (MO->getReg() != Reg) {
+ errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": "
+ << *MO << " is the wrong register\n";
+ Valid = false;
+ }
+ }
+ assert(Valid && "Invalid use list");
+#endif
+}
+
+void MachineRegisterInfo::verifyUseLists() const {
+#ifndef NDEBUG
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ verifyUseList(TargetRegisterInfo::index2VirtReg(i));
+ for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i)
+ verifyUseList(i);
+#endif
+}
+
/// Add MO to the linked list of operands for its register.
void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
assert(!MO->isOnRegUseList() && "Already on list");
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index 5bd2349..fff6b2b 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -51,7 +51,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
static bool ViewMISchedDAGs = false;
#endif // NDEBUG
-// Experimental heuristics
+// FIXME: remove this flag after initial testing. It should always be a good
+// thing.
+static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden,
+ cl::desc("Constrain vreg copies."), cl::init(true));
+
static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
cl::desc("Enable load clustering."), cl::init(true));
@@ -323,6 +327,10 @@ ScheduleDAGMI::~ScheduleDAGMI() {
delete SchedImpl;
}
+bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+ return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
if (SuccSU != &ExitSU) {
// Do not use WillCreateCycle, it assumes SD scheduling.
@@ -404,6 +412,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
}
}
+/// This is normally called from the main scheduler loop but may also be invoked
+/// by the scheduling strategy to perform additional code motion.
void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
MachineBasicBlock::iterator InsertPos) {
// Advance RegionBegin if the first instruction moves down.
@@ -505,6 +515,14 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
if ((int)NewMaxPressure[ID] > MaxUnits)
MaxUnits = NewMaxPressure[ID];
}
+ DEBUG(
+ for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
+ unsigned Limit = TRI->getRegPressureSetLimit(i);
+ if (NewMaxPressure[i] > Limit ) {
+ dbgs() << " " << TRI->getRegPressureSetName(i) << ": "
+ << NewMaxPressure[i] << " > " << Limit << "\n";
+ }
+ });
}
/// schedule - Called back from MachineScheduler::runOnMachineFunction
@@ -905,6 +923,184 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
}
//===----------------------------------------------------------------------===//
+// CopyConstrain - DAG post-processing to encourage copy elimination.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create weak edges from all uses of a copy to
+/// the one use that defines the copy's source vreg, most likely an induction
+/// variable increment.
+class CopyConstrain : public ScheduleDAGMutation {
+ // Transient state.
+ SlotIndex RegionBeginIdx;
+ // RegionEndIdx is the slot index of the last non-debug instruction in the
+ // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
+ SlotIndex RegionEndIdx;
+public:
+ CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
+
+ virtual void apply(ScheduleDAGMI *DAG);
+
+protected:
+ void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// constrainLocalCopy handles two possibilities:
+/// 1) Local src:
+/// I0: = dst
+/// I1: src = ...
+/// I2: = dst
+/// I3: dst = src (copy)
+/// (create pred->succ edges I0->I1, I2->I1)
+///
+/// 2) Local copy:
+/// I0: dst = src (copy)
+/// I1: = dst
+/// I2: src = ...
+/// I3: = dst
+/// (create pred->succ edges I1->I2, I3->I2)
+///
+/// Although the MachineScheduler is currently constrained to single blocks,
+/// this algorithm should handle extended blocks. An EBB is a set of
+/// contiguously numbered blocks such that the previous block in the EBB is
+/// always the single predecessor.
+void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
+ LiveIntervals *LIS = DAG->getLIS();
+ MachineInstr *Copy = CopySU->getInstr();
+
+ // Check for pure vreg copies.
+ unsigned SrcReg = Copy->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ return;
+
+ unsigned DstReg = Copy->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ return;
+
+ // Check if either the dest or source is local. If it's live across a back
+ // edge, it's not local. Note that if both vregs are live across the back
+ // edge, we cannot successfully contrain the copy without cyclic scheduling.
+ unsigned LocalReg = DstReg;
+ unsigned GlobalReg = SrcReg;
+ LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
+ LocalReg = SrcReg;
+ GlobalReg = DstReg;
+ LocalLI = &LIS->getInterval(LocalReg);
+ if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
+ return;
+ }
+ LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
+
+ // Find the global segment after the start of the local LI.
+ LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
+ // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
+ // local live range. We could create edges from other global uses to the local
+ // start, but the coalescer should have already eliminated these cases, so
+ // don't bother dealing with it.
+ if (GlobalSegment == GlobalLI->end())
+ return;
+
+ // If GlobalSegment is killed at the LocalLI->start, the call to find()
+ // returned the next global segment. But if GlobalSegment overlaps with
+ // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
+ // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
+ if (GlobalSegment->contains(LocalLI->beginIndex()))
+ ++GlobalSegment;
+
+ if (GlobalSegment == GlobalLI->end())
+ return;
+
+ // Check if GlobalLI contains a hole in the vicinity of LocalLI.
+ if (GlobalSegment != GlobalLI->begin()) {
+ // Two address defs have no hole.
+ if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
+ GlobalSegment->start)) {
+ return;
+ }
+ // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
+ // it would be a disconnected component in the live range.
+ assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
+ "Disconnected LRG within the scheduling region.");
+ }
+ MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
+ if (!GlobalDef)
+ return;
+
+ SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
+ if (!GlobalSU)
+ return;
+
+ // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
+ // constraining the uses of the last local def to precede GlobalDef.
+ SmallVector<SUnit*,8> LocalUses;
+ const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
+ MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
+ SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
+ for (SUnit::const_succ_iterator
+ I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
+ I != E; ++I) {
+ if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
+ continue;
+ if (I->getSUnit() == GlobalSU)
+ continue;
+ if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
+ return;
+ LocalUses.push_back(I->getSUnit());
+ }
+ // Open the top of the GlobalLI hole by constraining any earlier global uses
+ // to precede the start of LocalLI.
+ SmallVector<SUnit*,8> GlobalUses;
+ MachineInstr *FirstLocalDef =
+ LIS->getInstructionFromIndex(LocalLI->beginIndex());
+ SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
+ for (SUnit::const_pred_iterator
+ I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
+ if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
+ continue;
+ if (I->getSUnit() == FirstLocalSU)
+ continue;
+ if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
+ return;
+ GlobalUses.push_back(I->getSUnit());
+ }
+ DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
+ // Add the weak edges.
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
+ DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
+ << GlobalSU->NodeNum << ")\n");
+ DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
+ }
+ for (SmallVectorImpl<SUnit*>::const_iterator
+ I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
+ DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
+ << FirstLocalSU->NodeNum << ")\n");
+ DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
+ }
+}
+
+/// \brief Callback from DAG postProcessing to create weak edges to encourage
+/// copy elimination.
+void CopyConstrain::apply(ScheduleDAGMI *DAG) {
+ MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
+ if (FirstPos == DAG->end())
+ return;
+ RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
+ RegionEndIdx = DAG->getLIS()->getInstructionIndex(
+ &*priorNonDebug(DAG->end(), DAG->begin()));
+
+ for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+ SUnit *SU = &DAG->SUnits[Idx];
+ if (!SU->getInstr()->isCopy())
+ continue;
+
+ constrainLocalCopy(SU, DAG);
+ }
+}
+
+//===----------------------------------------------------------------------===//
// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
//===----------------------------------------------------------------------===//
@@ -916,7 +1112,7 @@ public:
/// Represent the type of SchedCandidate found within a single queue.
/// pickNodeBidirectional depends on these listed by decreasing priority.
enum CandReason {
- NoCand, SingleExcess, SingleCritical, Cluster,
+ NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak,
ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
NodeOrder};
@@ -1191,6 +1387,8 @@ protected:
const RegPressureTracker &RPTracker,
SchedCandidate &Candidate);
+ void reschedulePhysRegCopies(SUnit *SU, bool isTop);
+
#ifndef NDEBUG
void traceCandidate(const SchedCandidate &Cand);
#endif
@@ -1339,6 +1537,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
I != E; ++I) {
unsigned L = getUnscheduledLatency(*I);
+ DEBUG(dbgs() << " " << Available.getName()
+ << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
if (L > RemLatency)
RemLatency = L;
}
@@ -1349,10 +1549,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
RemLatency = L;
}
unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+ DEBUG(dbgs() << " " << Available.getName()
+ << " ExpectedLatency " << ExpectedLatency
+ << " CP Limit " << CriticalPathLimit << '\n');
if (RemLatency + ExpectedLatency >= CriticalPathLimit
&& RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
Policy.ReduceLatency = true;
- DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
+ DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n');
}
}
@@ -1569,7 +1772,8 @@ void ConvergingScheduler::balanceZones(
if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
> (int)SchedModel->getLatencyFactor()) {
CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
- DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+ DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName()
+ << " reduce "
<< SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
<< '\n');
}
@@ -1580,7 +1784,8 @@ void ConvergingScheduler::balanceZones(
if ((int)(OppositeZone.ExpectedCount - OppositeCount)
> (int)SchedModel->getLatencyFactor()) {
OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
- DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+ DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName()
+ << " demand "
<< SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
<< '\n');
}
@@ -1604,7 +1809,7 @@ void ConvergingScheduler::checkResourceLimits(
if (Top.CritResIdx != Rem.CritResIdx) {
TopCand.Policy.ReduceResIdx = Top.CritResIdx;
BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
- DEBUG(dbgs() << "Reduce scheduled "
+ DEBUG(dbgs() << " Reduce scheduled "
<< SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
}
return;
@@ -1621,7 +1826,7 @@ void ConvergingScheduler::checkResourceLimits(
&& (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
TopCand.Policy.ReduceLatency = true;
BotCand.Policy.ReduceLatency = true;
- DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+ DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency
<< " + " << Bot.ExpectedLatency << '\n');
}
return;
@@ -1696,6 +1901,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
}
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+ const MachineInstr *MI = SU->getInstr();
+ if (!MI->isCopy())
+ return 0;
+
+ unsigned ScheduledOper = isTop ? 1 : 0;
+ unsigned UnscheduledOper = isTop ? 0 : 1;
+ // If we have already scheduled the physreg produce/consumer, immediately
+ // schedule the copy.
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(ScheduledOper).getReg()))
+ return 1;
+ // If the physreg is at the boundary, defer it. Otherwise schedule it
+ // immediately to free the dependent. We can hoist the copy later.
+ bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+ if (TargetRegisterInfo::isPhysicalRegister(
+ MI->getOperand(UnscheduledOper).getReg()))
+ return AtBoundary ? -1 : 1;
+ return 0;
+}
+
/// Apply a set of heursitics to a new candidate. Heuristics are currently
/// hierarchical. This may be more efficient than a graduated cost model because
/// we don't need to evaluate all aspects of the model for each node in the
@@ -1723,6 +1956,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
TryCand.Reason = NodeOrder;
return;
}
+
+ if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
+ biasPhysRegCopy(Cand.SU, Zone.isTop()),
+ TryCand, Cand, PhysRegCopy))
+ return;
+
// Avoid exceeding the target's limit.
if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
@@ -1749,12 +1988,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
TryCand, Cand, Cluster))
return;
- // Currently, weak edges are for clustering, so we hard-code that reason.
- // However, deferring the current TryCand will not change Cand's reason.
+
+ // Weak edges are for clustering and other constraints.
+ //
+ // Deferring TryCand here does not change Cand's reason. This is good in the
+ // sense that a bad candidate shouldn't affect a previous candidate's
+ // goodness, but bad in that it is assymetric and depends on queue order.
CandReason OrigReason = Cand.Reason;
if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
getWeakLeft(Cand.SU, Zone.isTop()),
- TryCand, Cand, Cluster)) {
+ TryCand, Cand, Weak)) {
Cand.Reason = OrigReason;
return;
}
@@ -1825,20 +2068,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS,
// Avoid increasing the max critical pressure in the scheduled region.
if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
- DEBUG(dbgs() << "RP excess top - bot: "
+ DEBUG(dbgs() << " RP excess top - bot: "
<< (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
}
// Avoid increasing the max critical pressure in the scheduled region.
if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
- DEBUG(dbgs() << "RP critical top - bot: "
+ DEBUG(dbgs() << " RP critical top - bot: "
<< (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
<< '\n');
return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
}
// Avoid increasing the max pressure of the entire region.
if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
- DEBUG(dbgs() << "RP current top - bot: "
+ DEBUG(dbgs() << " RP current top - bot: "
<< (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
<< '\n');
return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
@@ -1851,9 +2094,11 @@ const char *ConvergingScheduler::getReasonStr(
ConvergingScheduler::CandReason Reason) {
switch (Reason) {
case NoCand: return "NOCAND ";
+ case PhysRegCopy: return "PREG-COPY";
case SingleExcess: return "REG-EXCESS";
case SingleCritical: return "REG-CRIT ";
case Cluster: return "CLUSTER ";
+ case Weak: return "WEAK ";
case SingleMax: return "REG-MAX ";
case MultiPressure: return "REG-MULTI ";
case ResourceReduce: return "RES-REDUCE";
@@ -1953,8 +2198,7 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
bool IsTop) {
- DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot")
- << " SU(" << Cand.SU->NodeNum << ") "
+ DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
<< ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
}
@@ -1964,10 +2208,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
IsTopNode = false;
+ DEBUG(dbgs() << "Pick Top NOCAND\n");
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
IsTopNode = true;
+ DEBUG(dbgs() << "Pick Bot NOCAND\n");
return SU;
}
CandPolicy NoPolicy;
@@ -2065,21 +2311,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
if (SU->isBottomReady())
Bot.removeReady(SU);
- DEBUG(dbgs() << "Scheduling " << *SU->getInstr());
+ DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
return SU;
}
+void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+
+ MachineBasicBlock::iterator InsertPos = SU->getInstr();
+ if (!isTop)
+ ++InsertPos;
+ SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
+
+ // Find already scheduled copies with a single physreg dependence and move
+ // them just above the scheduled instruction.
+ for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
+ I != E; ++I) {
+ if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+ continue;
+ SUnit *DepSU = I->getSUnit();
+ if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
+ continue;
+ MachineInstr *Copy = DepSU->getInstr();
+ if (!Copy->isCopy())
+ continue;
+ DEBUG(dbgs() << " Rescheduling physreg copy ";
+ I->getSUnit()->dump(DAG));
+ DAG->moveInstruction(Copy, InsertPos);
+ }
+}
+
/// Update the scheduler's state after scheduling a node. This is the same node
/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
/// it's state based on the current cycle before MachineSchedStrategy does.
+///
+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
+/// them here. See comments in biasPhysRegCopy.
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = Top.CurrCycle;
Top.bumpNode(SU);
+ if (SU->hasPhysRegUses)
+ reschedulePhysRegCopies(SU, true);
}
else {
SU->BotReadyCycle = Bot.CurrCycle;
Bot.bumpNode(SU);
+ if (SU->hasPhysRegDefs)
+ reschedulePhysRegCopies(SU, false);
}
}
@@ -2090,6 +2368,12 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
"-misched-topdown incompatible with -misched-bottomup");
ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
// Register DAG post-processors.
+ //
+ // FIXME: extend the mutation API to allow earlier mutations to instantiate
+ // data and pass it to later mutations. Have a single mutation that gathers
+ // the interesting nodes in one pass.
+ if (EnableCopyConstrain)
+ DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
if (EnableLoadCluster)
DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
if (EnableMacroFusion)
@@ -2179,12 +2463,12 @@ public:
SUnit *SU = ReadyQ.back();
ReadyQ.pop_back();
IsTopNode = false;
- DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
- << *SU->getInstr()
+ DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
<< " ILP: " << DAG->getDFSResult()->getILP(SU)
<< " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
<< DAG->getDFSResult()->getSubtreeLevel(
- DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
+ DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
+ << "Scheduling " << *SU->getInstr());
return SU;
}
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 49d8c4e..00f702c 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1200,8 +1200,10 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
return std::max(Instrs, PRMax);
}
+
unsigned MachineTraceMetrics::Trace::
-getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks,
+ ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const {
// Add up resources above and below the center block.
ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
@@ -1210,6 +1212,18 @@ getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
unsigned PRCycles = PRDepths[K] + PRHeights[K];
for (unsigned I = 0; I != Extrablocks.size(); ++I)
PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+ for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
+ const MCSchedClassDesc* SC = ExtraInstrs[I];
+ if (!SC->isValid())
+ continue;
+ for (TargetSchedModel::ProcResIter
+ PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+ PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+ if (PI->ProcResourceIdx != K)
+ continue;
+ PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
+ }
+ }
PRMax = std::max(PRMax, PRCycles);
}
// Convert to cycle count.
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index 4b12300..037043f 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -472,6 +472,9 @@ void MachineVerifier::visitMachineFunctionBefore() {
if (MInfo.Succs.size() != I->succ_size())
report("MBB has duplicate entries in its successor list.", I);
}
+
+ // Check that the register use lists are sane.
+ MRI->verifyUseLists();
}
// Does iterator point to a and b as the first two elements?
diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp
index 1af65c8..bfbc062 100644
--- a/contrib/llvm/lib/CodeGen/Passes.cpp
+++ b/contrib/llvm/lib/CodeGen/Passes.cpp
@@ -93,9 +93,10 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
/// simple binary flags that either suppress the pass or do nothing.
/// i.e. -disable-mypass=false has no effect.
/// These should be converted to boolOrDefault in order to use applyOverride.
-static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
+static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
+ bool Override) {
if (Override)
- return 0;
+ return IdentifyingPassPtr();
return PassID;
}
@@ -103,19 +104,20 @@ static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
/// flags with ternary conditions. TargetID is passed through by default. The
/// pass is suppressed when the option is false. When the option is true, the
/// StandardID is selected if the target provides no default.
-static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
- AnalysisID StandardID) {
+static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
+ cl::boolOrDefault Override,
+ AnalysisID StandardID) {
switch (Override) {
case cl::BOU_UNSET:
return TargetID;
case cl::BOU_TRUE:
- if (TargetID)
+ if (TargetID.isValid())
return TargetID;
if (StandardID == 0)
report_fatal_error("Target cannot enable pass");
return StandardID;
case cl::BOU_FALSE:
- return 0;
+ return IdentifyingPassPtr();
}
llvm_unreachable("Invalid command line option state");
}
@@ -132,7 +134,8 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
/// pass to run. This allows multiple options to control a single pass depending
/// on where in the pipeline that pass is added.
-static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+static IdentifyingPassPtr overridePass(AnalysisID StandardID,
+ IdentifyingPassPtr TargetID) {
if (StandardID == &PostRASchedulerID)
return applyDisable(TargetID, DisablePostRA);
@@ -200,11 +203,11 @@ public:
// user interface. For example, a target may disable a standard pass by
// default by substituting a pass ID of zero, and the user may still enable
// that standard pass with an explicit command line option.
- DenseMap<AnalysisID,AnalysisID> TargetPasses;
+ DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses;
/// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
/// is inserted after each instance of the first one.
- SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
+ SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses;
};
} // namespace llvm
@@ -239,9 +242,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
- AnalysisID InsertedPassID) {
- assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
- std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+ IdentifyingPassPtr InsertedPassID) {
+ assert(((!InsertedPassID.isInstance() &&
+ TargetPassID != InsertedPassID.getID()) ||
+ (InsertedPassID.isInstance() &&
+ TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
+ "Insert a pass after itself!");
+ std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID);
Impl->InsertedPasses.push_back(P);
}
@@ -265,12 +272,12 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) {
}
void TargetPassConfig::substitutePass(AnalysisID StandardID,
- AnalysisID TargetID) {
+ IdentifyingPassPtr TargetID) {
Impl->TargetPasses[StandardID] = TargetID;
}
-AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
- DenseMap<AnalysisID, AnalysisID>::const_iterator
+IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+ DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator
I = Impl->TargetPasses.find(ID);
if (I == Impl->TargetPasses.end())
return ID;
@@ -303,24 +310,39 @@ void TargetPassConfig::addPass(Pass *P) {
/// Add a CodeGen pass at this point in the pipeline after checking for target
/// and command line overrides.
+///
+/// addPass cannot return a pointer to the pass instance because is internal the
+/// PassManager and the instance we create here may already be freed.
AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
- AnalysisID TargetID = getPassSubstitution(PassID);
- AnalysisID FinalID = overridePass(PassID, TargetID);
- if (FinalID == 0)
- return FinalID;
-
- Pass *P = Pass::createPass(FinalID);
- if (!P)
- llvm_unreachable("Pass ID not registered");
- addPass(P);
+ IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
+ IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
+ if (!FinalPtr.isValid())
+ return 0;
+
+ Pass *P;
+ if (FinalPtr.isInstance())
+ P = FinalPtr.getInstance();
+ else {
+ P = Pass::createPass(FinalPtr.getID());
+ if (!P)
+ llvm_unreachable("Pass ID not registered");
+ }
+ AnalysisID FinalID = P->getPassID();
+ addPass(P); // Ends the lifetime of P.
+
// Add the passes after the pass P if there is any.
- for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+ for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator
I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
I != E; ++I) {
if ((*I).first == PassID) {
- assert((*I).second && "Illegal Pass ID!");
- Pass *NP = Pass::createPass((*I).second);
- assert(NP && "Pass ID not registered");
+ assert((*I).second.isValid() && "Illegal Pass ID!");
+ Pass *NP;
+ if ((*I).second.isInstance())
+ NP = (*I).second.getInstance();
+ else {
+ NP = Pass::createPass((*I).second.getID());
+ assert(NP && "Pass ID not registered");
+ }
addPass(NP);
}
}
@@ -687,14 +709,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addPass(&VirtRegRewriterID);
printAndVerify("After Virtual Register Rewriter");
- // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
- // but eventually, all users of it should probably be moved to addPostRA and
- // it can go away. Currently, it's the intended place for targets to run
- // FinalizeMachineBundles, because passes other than MachineScheduling an
- // RegAlloc itself may not be aware of bundles.
- if (addFinalizeRegAlloc())
- printAndVerify("After RegAlloc finalization");
-
// Perform stack slot coloring and post-ra machine LICM.
//
// FIXME: Re-enable coloring with register when it's capable of adding
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index e5872df..959dd7d 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -824,6 +824,12 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// The instruction stream may change in the loop, so check BB->end()
// directly.
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ // We might end up here again with a NULL iterator if we scavenged a
+ // register for which we inserted spill code for definition by what was
+ // originally the first instruction in BB.
+ if (I == MachineBasicBlock::iterator(NULL))
+ I = BB->begin();
+
MachineInstr *MI = I;
MachineBasicBlock::iterator J = llvm::next(I);
MachineBasicBlock::iterator P = I == BB->begin() ?
@@ -883,8 +889,6 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
"The register scavenger has an unexpected position");
I = P;
RS->unprocess(P);
-
- // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I));
} else
++I;
}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 0b6dc68..7fcfe9e 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -63,7 +63,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
MachineFunction *MF;
// state
- std::auto_ptr<Spiller> SpillerInstance;
+ OwningPtr<Spiller> SpillerInstance;
std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
CompSpillWeight> Queue;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 6d84176..9eed1fc 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -78,7 +78,7 @@ class RAGreedy : public MachineFunctionPass,
LiveDebugVariables *DebugVars;
// state
- std::auto_ptr<Spiller> SpillerInstance;
+ OwningPtr<Spiller> SpillerInstance;
std::priority_queue<std::pair<unsigned, unsigned> > Queue;
unsigned NextCascade;
@@ -166,8 +166,8 @@ class RAGreedy : public MachineFunctionPass,
};
// splitting state.
- std::auto_ptr<SplitAnalysis> SA;
- std::auto_ptr<SplitEditor> SE;
+ OwningPtr<SplitAnalysis> SA;
+ OwningPtr<SplitEditor> SE;
/// Cached per-block interference maps
InterferenceCache IntfCache;
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 607edac..15a88e2 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/RegAllocPBQP.h"
#include "RegisterCoalescer.h"
#include "Spiller.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -89,8 +90,8 @@ public:
static char ID;
/// Construct a PBQP register allocator.
- RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
- : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+ RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0)
+ : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
@@ -121,7 +122,7 @@ private:
typedef std::set<unsigned> RegSet;
- std::auto_ptr<PBQPBuilder> builder;
+ OwningPtr<PBQPBuilder> builder;
char *customPassID;
@@ -132,7 +133,7 @@ private:
const MachineLoopInfo *loopInfo;
MachineRegisterInfo *mri;
- std::auto_ptr<Spiller> spiller;
+ OwningPtr<Spiller> spiller;
LiveIntervals *lis;
LiveStacks *lss;
VirtRegMap *vrm;
@@ -186,16 +187,15 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
return allowedSet[option - 1];
}
-std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
- const LiveIntervals *lis,
- const MachineLoopInfo *loopInfo,
- const RegSet &vregs) {
+PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
MachineRegisterInfo *mri = &mf->getRegInfo();
const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
- std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+ OwningPtr<PBQPRAProblem> p(new PBQPRAProblem());
PBQP::Graph &g = p->getGraph();
RegSet pregs;
@@ -282,7 +282,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
}
}
- return p;
+ return p.take();
}
void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
@@ -311,13 +311,12 @@ void PBQPBuilder::addInterferenceCosts(
}
}
-std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
- MachineFunction *mf,
+PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
const LiveIntervals *lis,
const MachineLoopInfo *loopInfo,
const RegSet &vregs) {
- std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+ OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs));
PBQP::Graph &g = p->getGraph();
const TargetMachine &tm = mf->getTarget();
@@ -391,7 +390,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
}
}
- return p;
+ return p.take();
}
void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
@@ -584,8 +583,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
while (!pbqpAllocComplete) {
DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
- std::auto_ptr<PBQPRAProblem> problem =
- builder->build(mf, lis, loopInfo, vregsToAlloc);
+ OwningPtr<PBQPRAProblem> problem(
+ builder->build(mf, lis, loopInfo, vregsToAlloc));
#ifndef NDEBUG
if (pbqpDumpGraphs) {
@@ -621,18 +620,18 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
}
FunctionPass* llvm::createPBQPRegisterAllocator(
- std::auto_ptr<PBQPBuilder> builder,
+ OwningPtr<PBQPBuilder> &builder,
char *customPassID) {
return new RegAllocPBQP(builder, customPassID);
}
FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
- if (pbqpCoalescing) {
- return createPBQPRegisterAllocator(
- std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
- } // else
- return createPBQPRegisterAllocator(
- std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+ OwningPtr<PBQPBuilder> Builder;
+ if (pbqpCoalescing)
+ Builder.reset(new PBQPBuilderWithCoalescing());
+ else
+ Builder.reset(new PBQPBuilder());
+ return createPBQPRegisterAllocator(Builder);
}
#undef DEBUG_TYPE
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 07ace7a..f82ccbe 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -154,14 +154,13 @@ void RegScavenger::unprocess() {
assert(Tracking && "Cannot unprocess because we're not tracking");
MachineInstr *MI = MBBI;
- if (MI->isDebugValue())
- return;
-
- determineKillsAndDefs();
+ if (!MI->isDebugValue()) {
+ determineKillsAndDefs();
- // Commit the changes.
- setUsed(KillRegs);
- setUnused(DefRegs);
+ // Commit the changes.
+ setUsed(KillRegs);
+ setUnused(DefRegs);
+ }
if (MBBI == MBB->begin()) {
MBBI = MachineBasicBlock::iterator(NULL);
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 71e7a21..e4da6a4 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -262,6 +262,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (UseOp < 0)
Dep = SDep(SU, SDep::Artificial);
else {
+ // Set the hasPhysRegDefs only for physreg defs that have a use within
+ // the scheduling region.
+ SU->hasPhysRegDefs = true;
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
Dep.setMinLatency(
@@ -318,6 +321,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
}
if (!MO.isDef()) {
+ SU->hasPhysRegUses = true;
// Either insert a new Reg2SUnits entry with an empty SUnits list, or
// retrieve the existing SUnits list for this register's uses.
// Push this SUnit on the use list.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eb16095..2e09ec0 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -205,6 +205,7 @@ namespace {
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
+ SDValue visitVSELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
SDValue visitSETCC(SDNode *N);
SDValue visitSIGN_EXTEND(SDNode *N);
@@ -243,7 +244,6 @@ namespace {
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
- SDValue visitMEMBARRIER(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
@@ -1127,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
+ case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
@@ -1165,7 +1166,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
- case ISD::MEMBARRIER: return visitMEMBARRIER(N);
}
return SDValue();
}
@@ -4164,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Canonicalize integer abs.
+ // vselect (setg[te] X, 0), X, -X ->
+ // vselect (setgt X, -1), X, -X ->
+ // vselect (setl[te] X, 0), -X, X ->
+ // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+ if (N0.getOpcode() == ISD::SETCC) {
+ SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ bool isAbs = false;
+ bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
+ N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
+ else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
+ N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
+ isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+
+ if (isAbs) {
+ EVT VT = LHS.getValueType();
+ SDValue Shift = DAG.getNode(
+ ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4453,7 +4493,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::SETCC) {
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
- if (VT.isVector() && !LegalOperations) {
+ if (VT.isVector() && !LegalOperations &&
+ TLI.getBooleanContents(true) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
EVT N0VT = N0.getOperand(0).getValueType();
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
@@ -7110,25 +7152,40 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
BasePtr.getNode() && "Expected BasePtr operand");
- APInt OV =
- cast<ConstantSDNode>(Offset)->getAPIntValue();
- if (AM == ISD::PRE_DEC)
- OV = -OV;
+ // We need to replace ptr0 in the following expression:
+ // x0 * offset0 + y0 * ptr0 = t0
+ // knowing that
+ // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
+ //
+ // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
+ // indexed load/store and the expresion that needs to be re-written.
+ //
+ // Therefore, we have:
+ // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
ConstantSDNode *CN =
cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
- APInt CNV = CN->getAPIntValue();
- if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
- CNV += OV;
- else
- CNV -= OV;
+ int X0, X1, Y0, Y1;
+ APInt Offset0 = CN->getAPIntValue();
+ APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
- SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
- SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
- if (OffsetIdx == 0)
- std::swap(NewOp1, NewOp2);
+ X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+ Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+ X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+ Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
- SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+ unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
+
+ APInt CNV = Offset0;
+ if (X0 < 0) CNV = -CNV;
+ if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
+ else CNV = CNV - Offset1;
+
+ // We can now generate the new expression.
+ SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+ SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+
+ SDValue NewUse = DAG.getNode(Opcode,
OtherUses[i]->getDebugLoc(),
OtherUses[i]->getValueType(0), NewOp1, NewOp2);
DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
@@ -9065,6 +9122,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(N->getValueType(0));
+ // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
+ // nodes often generate nop CONCAT_VECTOR nodes.
+ // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
+ // place the incoming vectors at the exact same location.
+ SDValue SingleSource = SDValue();
+ unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.getOpcode() == ISD::UNDEF)
+ continue;
+
+ // Check if this is the identity extract:
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ // Find the single incoming vector for the extract_subvector.
+ if (SingleSource.getNode()) {
+ if (Op.getOperand(0) != SingleSource)
+ return SDValue();
+ } else {
+ SingleSource = Op.getOperand(0);
+
+ // Check the source type is the same as the type of the result.
+ // If not, this concat may extend the vector, so we can not
+ // optimize it away.
+ if (SingleSource.getValueType() != N->getValueType(0))
+ return SDValue();
+ }
+
+ unsigned IdentityIndex = i * PartNumElem;
+ ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ // The extract index must be constant.
+ if (!CS)
+ return SDValue();
+
+ // Check that we are reading from the identity index.
+ if (CS->getZExtValue() != IdentityIndex)
+ return SDValue();
+ }
+
+ if (SingleSource.getNode())
+ return SingleSource;
+
return SDValue();
}
@@ -9125,6 +9227,44 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
}
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ SmallVector<SDValue, 4> Ops;
+ EVT ConcatVT = N0.getOperand(0).getValueType();
+ unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
+ unsigned NumConcats = NumElts / NumElemsPerConcat;
+
+ // Look at every vector that's inserted. We're looking for exact
+ // subvector-sized copies from a concatenated vector
+ for (unsigned I = 0; I != NumConcats; ++I) {
+ // Make sure we're dealing with a copy.
+ unsigned Begin = I * NumElemsPerConcat;
+ if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
+ return SDValue();
+
+ for (unsigned J = 1; J != NumElemsPerConcat; ++J) {
+ if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
+ return SDValue();
+ }
+
+ unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
+ if (FirstElt < N0.getNumOperands())
+ Ops.push_back(N0.getOperand(FirstElt));
+ else
+ Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(),
+ Ops.size());
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -9226,6 +9366,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+ Level < AfterLegalizeVectorOps &&
+ (N1.getOpcode() == ISD::UNDEF ||
+ (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
+ SDValue V = partitionShuffleOfConcats(N, DAG);
+
+ if (V.getNode())
+ return V;
+ }
+
// If this shuffle node is simply a swizzle of another shuffle node,
// and it reverses the swizzle of the previous shuffle then we can
// optimize shuffle(shuffle(x, undef), undef) -> x.
@@ -9262,59 +9413,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
- if (!TLI.getShouldFoldAtomicFences())
- return SDValue();
-
- SDValue atomic = N->getOperand(0);
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- break;
- default:
- return SDValue();
- }
-
- SDValue fence = atomic.getOperand(0);
- if (fence.getOpcode() != ISD::MEMBARRIER)
- return SDValue();
-
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
- fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2),
- atomic.getOperand(3)), atomic.getResNo());
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
- fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2)),
- atomic.getResNo());
- default:
- return SDValue();
- }
-}
-
/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
/// an AND to a vector_shuffle with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 9ac738e..288499a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1505,3 +1505,61 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
return true;
}
+
+bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
+ assert(LI->hasOneUse() &&
+ "tryToFoldLoad expected a LoadInst with a single use");
+ // We know that the load has a single use, but don't know what it is. If it
+ // isn't one of the folded instructions, then we can't succeed here. Handle
+ // this by scanning the single-use users of the load until we get to FoldInst.
+ unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
+
+ const Instruction *TheUser = LI->use_back();
+ while (TheUser != FoldInst && // Scan up until we find FoldInst.
+ // Stay in the right block.
+ TheUser->getParent() == FoldInst->getParent() &&
+ --MaxUsers) { // Don't scan too far.
+ // If there are multiple or no uses of this instruction, then bail out.
+ if (!TheUser->hasOneUse())
+ return false;
+
+ TheUser = TheUser->use_back();
+ }
+
+ // If we didn't find the fold instruction, then we failed to collapse the
+ // sequence.
+ if (TheUser != FoldInst)
+ return false;
+
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile())
+ return false;
+
+ // Figure out which vreg this is going into. If there is no assigned vreg yet
+ // then there actually was no reference to it. Perhaps the load is referenced
+ // by a dead instruction.
+ unsigned LoadReg = getRegForValue(LI);
+ if (LoadReg == 0)
+ return false;
+
+ // We can't fold if this vreg has no uses or more than one use. Multiple uses
+ // may mean that the instruction got lowered to multiple MIs, or the use of
+ // the loaded value ended up being multiple operands of the result.
+ if (!MRI.hasOneUse(LoadReg))
+ return false;
+
+ MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes; make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo.InsertPt = User;
+ FuncInfo.MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
+}
+
+
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 51cc254..2a1d8c2 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2759,8 +2759,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(DAG.getConstant(0, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
- case ISD::ATOMIC_FENCE:
- case ISD::MEMBARRIER: {
+ case ISD::ATOMIC_FENCE: {
// If the target didn't lower this, lower it to '__sync_synchronize()' call
// FIXME: handle "fence singlethread" more efficiently.
TargetLowering::ArgListTy Args;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index d19c13b..cd2f060 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -777,7 +777,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
- case ISD::MEMBARRIER: Res = PromoteIntOp_MEMBARRIER(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
case ISD::VSELECT:
@@ -961,17 +960,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
N->getOperand(1), Idx), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
- SDValue NewOps[6];
- DebugLoc dl = N->getDebugLoc();
- NewOps[0] = N->getOperand(0);
- for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
- SDValue Flag = GetPromotedInteger(N->getOperand(i));
- NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
- }
- return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
-}
-
SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
// Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
// the operand in place.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 54ea926..1c4274a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -270,7 +270,6 @@ private:
SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
- SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
@@ -582,6 +581,7 @@ private:
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_TRUNCATE(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5ec8535..04c6bfd 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1046,6 +1046,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break;
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1062,7 +1063,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::FTRUNC:
- case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
@@ -1272,8 +1272,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
- // The input operands all must have the same type, and we know the result the
- // result type is valid. Convert this to a buildvector which extracts all the
+ // The input operands all must have the same type, and we know the result
+ // type is valid. Convert this to a buildvector which extracts all the
// input elements.
// TODO: If the input elements are power-two vectors, we could convert this to
// a new CONCAT_VECTORS node with elements that are half-wide.
@@ -1293,6 +1293,66 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
&Elts[0], Elts.size());
}
+SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+ // The result type is legal, but the input type is illegal. If splitting
+ // ends up with the result type of each half still being legal, just
+ // do that. If, however, that would result in an illegal result type,
+ // we can try to get more clever with power-two vectors. Specifically,
+ // split the input type, but also widen the result element size, then
+ // concatenate the halves and truncate again. For example, consider a target
+ // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
+ // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
+ // %inlo = v4i32 extract_subvector %in, 0
+ // %inhi = v4i32 extract_subvector %in, 4
+ // %lo16 = v4i16 trunc v4i32 %inlo
+ // %hi16 = v4i16 trunc v4i32 %inhi
+ // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
+ // %res = v8i8 trunc v8i16 %in16
+ //
+ // Without this transform, the original truncate would end up being
+ // scalarized, which is pretty much always a last resort.
+ SDValue InVec = N->getOperand(0);
+ EVT InVT = InVec->getValueType(0);
+ EVT OutVT = N->getValueType(0);
+ unsigned NumElements = OutVT.getVectorNumElements();
+ // Widening should have already made sure this is a power-two vector
+ // if we're trying to split it at all. assert() that's true, just in case.
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+
+ unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
+ unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+
+ // If the input elements are only 1/2 the width of the result elements,
+ // just use the normal splitting. Our trick only work if there's room
+ // to split more than once.
+ if (InElementSize <= OutElementSize * 2)
+ return SplitVecOp_UnaryOp(N);
+ DebugLoc DL = N->getDebugLoc();
+
+ // Extract the halves of the input via extract_subvector.
+ EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), NumElements/2);
+ SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
+ DAG.getIntPtrConstant(0));
+ SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
+ DAG.getIntPtrConstant(NumElements/2));
+ // Truncate them to 1/2 the element size.
+ EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
+ NumElements/2);
+ SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
+ SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+ // Concatenate them to get the full intermediate truncation result.
+ EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
+ SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
+ HalfHi);
+ // Now finish up by truncating all the way down to the original result
+ // type. This should normally be something that ends up being legal directly,
+ // but in theory if a target has very wide vectors and an annoyingly
+ // restricted set of legal types, this split can chain to build things up.
+ return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6424431..15235c8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2785,7 +2785,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
}
// Handle the scalar case first.
- if (Outputs.size() == 1)
+ if (Scalar1 && Scalar2)
return Outputs.back();
// Otherwise build a big vector out of the scalar elements we generated.
@@ -5252,14 +5252,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
SDVTList VTs = getVTList(VT);
- return getMachineNode(Opcode, dl, VTs, 0, 0);
+ return getMachineNode(Opcode, dl, VTs, None);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5267,7 +5267,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5275,20 +5275,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
SDValue Op1, SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2, Op3 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT);
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
SDVTList VTs = getVTList(VT1, VT2);
- return getMachineNode(Opcode, dl, VTs, 0, 0);
+ return getMachineNode(Opcode, dl, VTs, None);
}
MachineSDNode *
@@ -5296,7 +5296,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
EVT VT1, EVT VT2, SDValue Op1) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5304,7 +5304,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5313,15 +5313,15 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2, Op3 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
EVT VT1, EVT VT2,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2);
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5330,7 +5330,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
@@ -5339,39 +5339,41 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
SDValue Op1, SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2, Op3 };
- return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
EVT VT1, EVT VT2, EVT VT3,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
EVT VT2, EVT VT3, EVT VT4,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
ArrayRef<EVT> ResultTys,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
- return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+ return getMachineNode(Opcode, dl, VTs, Ops);
}
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> OpsArray) {
bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
void *IP = 0;
+ const SDValue *Ops = OpsArray.data();
+ unsigned NumOps = OpsArray.size();
if (DoCSE) {
FoldingSetNodeID ID;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ce40cd6..67db211 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -314,7 +314,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
} else {
Ctx.emitError(ErrMsg);
}
- report_fatal_error("Cannot handle scalar-to-vector conversion!");
+ return DAG.getUNDEF(ValueVT);
}
if (ValueVT.getVectorNumElements() == 1 &&
@@ -5034,6 +5034,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return 0;
}
+ case Intrinsic::annotation:
+ case Intrinsic::ptr_annotation:
+ // Drop the intrinsic, but forward the value
+ setValue(&I, getValue(I.getOperand(0)));
+ return 0;
case Intrinsic::var_annotation:
// Discard annotate attributes
return 0;
@@ -5232,6 +5237,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Entry.isSRet = true;
Entry.isNest = false;
Entry.isByVal = false;
+ Entry.isReturned = false;
Entry.Alignment = Align;
Args.push_back(Entry);
RetTy = Type::getVoidTy(FTy->getContext());
@@ -5249,13 +5255,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Entry.Node = ArgNode; Entry.Ty = V->getType();
unsigned attrInd = i - CS.arg_begin() + 1;
- Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
- Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
- Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
- Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
- Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
- Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
- Entry.Alignment = CS.getParamAlignment(attrInd);
+ Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt);
+ Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt);
+ Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+ Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet);
+ Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest);
+ Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+ Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned);
+ Entry.Alignment = CS.getParamAlignment(attrInd);
Args.push_back(Entry);
}
@@ -6169,10 +6176,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MatchedRegs.RegVTs.push_back(RegVT);
MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
- i != e; ++i)
- MatchedRegs.Regs.push_back
- (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
-
+ i != e; ++i) {
+ if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+ MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
+ else {
+ LLVMContext &Ctx = *DAG.getContext();
+ Ctx.emitError(CS.getInstruction(), "inline asm error: This value"
+ " type register class is not natively supported!");
+ report_fatal_error("inline asm error: This value type register "
+ "class is not natively supported!");
+ }
+ }
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
Chain, &Flag, CS.getInstruction());
@@ -6389,6 +6403,28 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
/// migrated to using LowerCall, this hook should be integrated into SDISel.
std::pair<SDValue, SDValue>
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+ // Handle the incoming return values from the call.
+ CLI.Ins.clear();
+ SmallVector<EVT, 4> RetTys;
+ ComputeValueVTs(*this, CLI.RetTy, RetTys);
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
+ }
+
// Handle all of the outgoing arguments.
CLI.Outs.clear();
CLI.OutVals.clear();
@@ -6442,6 +6478,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (Args[i].isZExt)
ExtendKind = ISD::ZERO_EXTEND;
+ // Conservatively only handle 'returned' on non-vectors for now
+ if (Args[i].isReturned && !Op.getValueType().isVector()) {
+ assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
+ "unexpected use of 'returned'");
+ // Before passing 'returned' to the target lowering code, ensure that
+ // either the register MVT and the actual EVT are the same size or that
+ // the return value and argument are extended in the same way; in these
+ // cases it's safe to pass the argument register value unchanged as the
+ // return register value (although it's at the target's option whether
+ // to do so)
+ // TODO: allow code generation to take advantage of partially preserved
+ // registers rather than clobbering the entire register when the
+ // parameter extension method is not compatible with the return
+ // extension method
+ if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
+ (ExtendKind != ISD::ANY_EXTEND &&
+ CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
+ Flags.setReturned();
+ }
+
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
@@ -6461,28 +6517,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
}
- // Handle the incoming return values from the call.
- CLI.Ins.clear();
- SmallVector<EVT, 4> RetTys;
- ComputeValueVTs(*this, CLI.RetTy, RetTys);
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
- for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
- MyFlags.Used = CLI.IsReturnValueUsed;
- if (CLI.RetSExt)
- MyFlags.Flags.setSExt();
- if (CLI.RetZExt)
- MyFlags.Flags.setZExt();
- if (CLI.IsInReg)
- MyFlags.Flags.setInReg();
- CLI.Ins.push_back(MyFlags);
- }
- }
-
SmallVector<SDValue, 4> InVals;
CLI.Chain = LowerCall(CLI, InVals);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 3b5823b..47b0391 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -54,7 +54,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::DELETED_NODE: return "<<Deleted Node!>>";
#endif
case ISD::PREFETCH: return "Prefetch";
- case ISD::MEMBARRIER: return "MemBarrier";
case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
case ISD::ATOMIC_SWAP: return "AtomicSwap";
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index eeea9e4..e21f26e 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -63,12 +63,16 @@ STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
+STATISTIC(NumFastIselFailLowerArguments,
+ "Number of entry blocks where fast isel failed to lower arguments");
#ifndef NDEBUG
static cl::opt<bool>
EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
cl::desc("Enable extra verbose messages in the \"fast\" "
"instruction selector"));
+
// Terminators
STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
@@ -742,7 +746,7 @@ public:
} // end anonymous namespace
void SelectionDAGISel::DoInstructionSelection() {
- DEBUG(errs() << "===== Instruction selection begins: BB#"
+ DEBUG(dbgs() << "===== Instruction selection begins: BB#"
<< FuncInfo->MBB->getNumber()
<< " '" << FuncInfo->MBB->getName() << "'\n");
@@ -801,7 +805,7 @@ void SelectionDAGISel::DoInstructionSelection() {
CurDAG->setRoot(Dummy.getValue());
}
- DEBUG(errs() << "===== Instruction selection ends:\n");
+ DEBUG(dbgs() << "===== Instruction selection ends:\n");
PostprocessISelDAG();
}
@@ -831,84 +835,6 @@ void SelectionDAGISel::PrepareEHLandingPad() {
if (Reg) MBB->addLiveIn(Reg);
}
-/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
-/// load into the specified FoldInst. Note that we could have a sequence where
-/// multiple LLVM IR instructions are folded into the same machineinstr. For
-/// example we could have:
-/// A: x = load i32 *P
-/// B: y = icmp A, 42
-/// C: br y, ...
-///
-/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and
-/// any other folded instructions) because it is between A and C.
-///
-/// If we succeed in folding the load into the operation, return true.
-///
-bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
- const Instruction *FoldInst,
- FastISel *FastIS) {
- // We know that the load has a single use, but don't know what it is. If it
- // isn't one of the folded instructions, then we can't succeed here. Handle
- // this by scanning the single-use users of the load until we get to FoldInst.
- unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs.
-
- const Instruction *TheUser = LI->use_back();
- while (TheUser != FoldInst && // Scan up until we find FoldInst.
- // Stay in the right block.
- TheUser->getParent() == FoldInst->getParent() &&
- --MaxUsers) { // Don't scan too far.
- // If there are multiple or no uses of this instruction, then bail out.
- if (!TheUser->hasOneUse())
- return false;
-
- TheUser = TheUser->use_back();
- }
-
- // If we didn't find the fold instruction, then we failed to collapse the
- // sequence.
- if (TheUser != FoldInst)
- return false;
-
- // Don't try to fold volatile loads. Target has to deal with alignment
- // constraints.
- if (LI->isVolatile()) return false;
-
- // Figure out which vreg this is going into. If there is no assigned vreg yet
- // then there actually was no reference to it. Perhaps the load is referenced
- // by a dead instruction.
- unsigned LoadReg = FastIS->getRegForValue(LI);
- if (LoadReg == 0)
- return false;
-
- // Check to see what the uses of this vreg are. If it has no uses, or more
- // than one use (at the machine instr level) then we can't fold it.
- MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
- if (RI == RegInfo->reg_end())
- return false;
-
- // See if there is exactly one use of the vreg. If there are multiple uses,
- // then the instruction got lowered to multiple machine instructions or the
- // use of the loaded value ended up being multiple operands of the result, in
- // either case, we can't fold this.
- MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
- if (PostRI != RegInfo->reg_end())
- return false;
-
- assert(RI.getOperand().isUse() &&
- "The only use of the vreg must be a use, we haven't emitted the def!");
-
- MachineInstr *User = &*RI;
-
- // Set the insertion point properly. Folding the load can cause generation of
- // other random instructions (like sign extends) for addressing modes, make
- // sure they get inserted in a logical place before the new instruction.
- FuncInfo->InsertPt = User;
- FuncInfo->MBB = User->getParent();
-
- // Ask the target to try folding the load.
- return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
-}
-
/// isFoldedOrDeadInstruction - Return true if the specified instruction is
/// side-effect free and is either dead or folded into a generated instruction.
/// Return false if it needs to be emitted.
@@ -1054,9 +980,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Emit code for any incoming arguments. This must happen before
// beginning FastISel on the entry block.
if (LLVMBB == &Fn.getEntryBlock()) {
+ ++NumEntryBlocks;
+
// Lower any arguments needed in this block if this is the entry block.
if (!FastIS->LowerArguments()) {
// Fast isel failed to lower these arguments
+ ++NumFastIselFailLowerArguments;
if (EnableFastISelAbortArgs)
llvm_unreachable("FastISel didn't lower all arguments");
@@ -1106,7 +1035,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
BeforeInst->hasOneUse() &&
- TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
+ FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
// If we succeeded, don't re-select the load.
BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
--NumFastIselRemaining;
@@ -1178,8 +1107,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
} else {
// Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock())
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ ++NumEntryBlocks;
LowerArguments(Fn);
+ }
}
if (Begin != BI)
@@ -1771,7 +1702,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes);
- DEBUG(errs() << "ISEL: Match complete!\n");
+ DEBUG(dbgs() << "ISEL: Match complete!\n");
}
enum ChainResult {
@@ -2276,9 +2207,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
SmallVector<SDNode*, 3> ChainNodesMatched;
SmallVector<SDNode*, 3> GlueResultNodesMatched;
- DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+ DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
NodeToMatch->dump(CurDAG);
- errs() << '\n');
+ dbgs() << '\n');
// Determine where to start the interpreter. Normally we start at opcode #0,
// but if the state machine starts with an OPC_SwitchOpcode, then we
@@ -2290,7 +2221,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Already computed the OpcodeOffset table, just index into it.
if (N.getOpcode() < OpcodeOffset.size())
MatcherIndex = OpcodeOffset[N.getOpcode()];
- DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n");
+ DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n");
} else if (MatcherTable[0] == OPC_SwitchOpcode) {
// Otherwise, the table isn't computed, but the state machine does start
@@ -2357,7 +2288,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (!Result)
break;
- DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
+ DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at "
<< "index " << MatcherIndexOfPredicate
<< ", continuing at " << FailIndex << "\n");
++NumDAGIselRetries;
@@ -2487,7 +2418,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
+ DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart
<< " to " << MatcherIndex << "\n");
continue;
}
@@ -2519,7 +2450,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
<< "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
continue;
}
@@ -2787,7 +2718,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If this is a normal EmitNode command, just create the new node and
// add the results to the RecordedNodes list.
Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
- VTList, Ops.data(), Ops.size());
+ VTList, Ops);
// Add all the non-glue/non-chain results to the RecordedNodes list.
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
@@ -2863,9 +2794,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
->setMemRefs(MemRefs, MemRefs + NumMemRefs);
}
- DEBUG(errs() << " "
+ DEBUG(dbgs() << " "
<< (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
- << " node: "; Res->dump(CurDAG); errs() << "\n");
+ << " node: "; Res->dump(CurDAG); dbgs() << "\n");
// If this was a MorphNodeTo then we're completely done!
if (Opcode == OPC_MorphNodeTo) {
@@ -2940,7 +2871,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If the code reached this point, then the match failed. See if there is
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
- DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
++NumDAGIselRetries;
while (1) {
if (MatchScopes.empty()) {
@@ -2960,7 +2891,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
MatcherIndex = LastScope.FailIndex;
- DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
+ DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n");
InputChain = LastScope.InputChain;
InputGlue = LastScope.InputGlue;
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
index 9ab4918..2feea59 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp
@@ -70,14 +70,14 @@ ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
// Debugging level for shrink wrapping.
enum ShrinkWrapDebugLevel {
- None, BasicInfo, Iterations, Details
+ Disabled, BasicInfo, Iterations, Details
};
static cl::opt<enum ShrinkWrapDebugLevel>
ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
cl::desc("Print shrink wrapping debugging information"),
cl::values(
- clEnumVal(None , "disable debug output"),
+ clEnumVal(Disabled , "disable debug output"),
clEnumVal(BasicInfo , "print basic DF sets"),
clEnumVal(Iterations, "print SR sets for each iteration"),
clEnumVal(Details , "print all DF sets"),
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f42bdbd..8074d16 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -620,12 +620,55 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
const TargetLoweringObjectFile *tlof)
: TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+ initActions();
+
+ // Perform these initializations only once.
+ IsLittleEndian = TD->isLittleEndian();
+ PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+ MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+ MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+ = MaxStoresPerMemmoveOptSize = 4;
+ UseUnderscoreSetJmp = false;
+ UseUnderscoreLongJmp = false;
+ SelectIsExpensive = false;
+ IntDivIsCheap = false;
+ Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
+ PredictableSelectIsExpensive = false;
+ StackPointerRegisterToSaveRestore = 0;
+ ExceptionPointerRegister = 0;
+ ExceptionSelectorRegister = 0;
+ BooleanContents = UndefinedBooleanContent;
+ BooleanVectorContents = UndefinedBooleanContent;
+ SchedPreferenceInfo = Sched::ILP;
+ JumpBufSize = 0;
+ JumpBufAlignment = 0;
+ MinFunctionAlignment = 0;
+ PrefFunctionAlignment = 0;
+ PrefLoopAlignment = 0;
+ MinStackArgumentAlignment = 1;
+ InsertFencesForAtomic = false;
+ SupportJumpTables = true;
+ MinimumJumpTableEntries = 4;
+
+ InitLibcallNames(LibcallRoutineNames, TM);
+ InitCmpLibcallCCs(CmpLibcallCCs);
+ InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+ delete &TLOF;
+}
+
+void TargetLoweringBase::initActions() {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));
+ memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+ memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
// Set default actions for various operations.
for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
@@ -702,45 +745,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
// here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
//
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
-
- IsLittleEndian = TD->isLittleEndian();
- PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
- memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
- memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
- MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
- MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
- = MaxStoresPerMemmoveOptSize = 4;
- UseUnderscoreSetJmp = false;
- UseUnderscoreLongJmp = false;
- SelectIsExpensive = false;
- IntDivIsCheap = false;
- Pow2DivIsCheap = false;
- JumpIsExpensive = false;
- PredictableSelectIsExpensive = false;
- StackPointerRegisterToSaveRestore = 0;
- ExceptionPointerRegister = 0;
- ExceptionSelectorRegister = 0;
- BooleanContents = UndefinedBooleanContent;
- BooleanVectorContents = UndefinedBooleanContent;
- SchedPreferenceInfo = Sched::ILP;
- JumpBufSize = 0;
- JumpBufAlignment = 0;
- MinFunctionAlignment = 0;
- PrefFunctionAlignment = 0;
- PrefLoopAlignment = 0;
- MinStackArgumentAlignment = 1;
- ShouldFoldAtomicFences = false;
- InsertFencesForAtomic = false;
- SupportJumpTables = true;
- MinimumJumpTableEntries = 4;
-
- InitLibcallNames(LibcallRoutineNames, TM);
- InitCmpLibcallCCs(CmpLibcallCCs);
- InitLibcallCallingConvs(LibcallCallingConvs);
-}
-
-TargetLoweringBase::~TargetLoweringBase() {
- delete &TLOF;
}
MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3bdca4c..7e7359a 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -523,11 +523,6 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
-
- // Handle thread local data.
- if (Kind.isThreadBSS()) return TLSBSSSection;
- if (Kind.isThreadData()) return TLSDataSection;
-
if (Kind.isText())
return GV->isWeakForLinker() ? TextCoalSection : TextSection;
@@ -580,6 +575,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isBSSLocal())
return DataBSSSection;
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
+
// Otherwise, just drop the variable in the normal data section.
return DataSection;
}
@@ -782,3 +781,49 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
return getDataSection();
}
+void TargetLoweringObjectFileCOFF::
+emitModuleFlags(MCStreamer &Streamer,
+ ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+ Mangler *Mang, const TargetMachine &TM) const {
+ MDNode *LinkerOptions = 0;
+
+ // Look for the "Linker Options" flag, since it's the only one we support.
+ for (ArrayRef<Module::ModuleFlagEntry>::iterator
+ i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+ const Module::ModuleFlagEntry &MFE = *i;
+ StringRef Key = MFE.Key->getString();
+ Value *Val = MFE.Val;
+ if (Key == "Linker Options") {
+ LinkerOptions = cast<MDNode>(Val);
+ break;
+ }
+ }
+ if (!LinkerOptions)
+ return;
+
+ // Emit the linker options to the linker .drectve section. According to the
+ // spec, this section is a space-separated string containing flags for linker.
+ const MCSection *Sec = getDrectveSection();
+ Streamer.SwitchSection(Sec);
+ for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
+ MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+ for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
+ MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
+ StringRef Op = MDOption->getString();
+ // Lead with a space for consistency with our dllexport implementation.
+ std::string Escaped(" ");
+ if (Op.find(" ") != StringRef::npos) {
+ // The PE-COFF spec says args with spaces must be quoted. It doesn't say
+ // how to escape quotes, but it probably uses this algorithm:
+ // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx
+ // FIXME: Reuse escaping code from Support/Windows/Program.inc
+ Escaped.push_back('\"');
+ Escaped.append(Op);
+ Escaped.push_back('\"');
+ } else {
+ Escaped.append(Op);
+ }
+ Streamer.EmitBytes(Escaped);
+ }
+ }
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 0f59d01..435a5e7 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -50,3 +50,29 @@ StringRef TargetOptions::getTrapFunctionName() const {
return TrapFuncName;
}
+bool TargetOptions::operator==(const TargetOptions &TO) {
+#define ARE_EQUAL(X) X == TO.X
+ return
+ ARE_EQUAL(UnsafeFPMath) &&
+ ARE_EQUAL(NoInfsFPMath) &&
+ ARE_EQUAL(NoNaNsFPMath) &&
+ ARE_EQUAL(HonorSignDependentRoundingFPMathOption) &&
+ ARE_EQUAL(UseSoftFloat) &&
+ ARE_EQUAL(NoZerosInBSS) &&
+ ARE_EQUAL(JITExceptionHandling) &&
+ ARE_EQUAL(JITEmitDebugInfo) &&
+ ARE_EQUAL(JITEmitDebugInfoToDisk) &&
+ ARE_EQUAL(GuaranteedTailCallOpt) &&
+ ARE_EQUAL(DisableTailCalls) &&
+ ARE_EQUAL(StackAlignmentOverride) &&
+ ARE_EQUAL(RealignStack) &&
+ ARE_EQUAL(SSPBufferSize) &&
+ ARE_EQUAL(EnableFastISel) &&
+ ARE_EQUAL(PositionIndependentExecutable) &&
+ ARE_EQUAL(EnableSegmentedStacks) &&
+ ARE_EQUAL(UseInitArray) &&
+ ARE_EQUAL(TrapFuncName) &&
+ ARE_EQUAL(FloatABIType) &&
+ ARE_EQUAL(AllowFPOpFusion);
+#undef ARE_EQUAL
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 783bfa1..1bf14db 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -128,6 +128,8 @@ resolveSchedClass(const MachineInstr *MI) const {
// Get the definition's scheduling class descriptor from this machine model.
unsigned SchedClass = MI->getDesc().getSchedClass();
const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ return SCDesc;
#ifndef NDEBUG
unsigned NIter = 0;
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index e6dfe10..7ca2bee 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -43,6 +43,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -58,6 +59,12 @@ STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk");
STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up");
STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down");
+// Temporary flag to disable rescheduling.
+static cl::opt<bool>
+EnableRescheduling("twoaddr-reschedule",
+ cl::desc("Coalesce copies by rescheduling (default=true)"),
+ cl::init(true), cl::Hidden);
+
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
MachineFunction *MF;
@@ -426,10 +433,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
/// isTwoAddrUse - Return true if the specified MI uses the specified register
/// as a two-address use. If so, return the destination register by reference.
static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
- const MCInstrDesc &MCID = MI.getDesc();
- unsigned NumOps = MI.isInlineAsm()
- ? MI.getNumOperands() : MCID.getNumOperands();
- for (unsigned i = 0; i != NumOps; ++i) {
+ for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
continue;
@@ -1144,7 +1148,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// If there is one more use of regB later in the same MBB, consider
// re-schedule this MI below it.
- if (rescheduleMIBelowKill(mi, nmi, regB)) {
+ if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
++NumReSchedDowns;
return true;
}
@@ -1163,7 +1167,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// If there is one more use of regB later in the same MBB, consider
// re-schedule it before this MI if it's legal.
- if (rescheduleKillAboveMI(mi, nmi, regB)) {
+ if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) {
++NumReSchedUps;
return true;
}
diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
index e3e4ccd..4f0eed4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -9,7 +9,7 @@
#include "DWARFCompileUnit.h"
#include "DWARFContext.h"
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -165,7 +165,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) {
// we were told to parse
const uint8_t *fixed_form_sizes =
- DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize());
+ DWARFFormValue::getFixedFormSizes(getAddressByteSize(), getVersion());
while (offset < next_cu_offset &&
die.extractFast(this, fixed_form_sizes, &offset)) {
diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
index 9e19310..9f52133 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp
@@ -9,6 +9,9 @@
#include "DWARFContext.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
@@ -107,36 +110,43 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
}
if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) {
- OS << "\n.debug_abbrev.dwo contents:\n";
- getDebugAbbrevDWO()->dump(OS);
+ const DWARFDebugAbbrev *D = getDebugAbbrevDWO();
+ if (D) {
+ OS << "\n.debug_abbrev.dwo contents:\n";
+ getDebugAbbrevDWO()->dump(OS);
+ }
}
- if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) {
- OS << "\n.debug_info.dwo contents:\n";
- for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i)
- getDWOCompileUnitAtIndex(i)->dump(OS);
- }
+ if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo)
+ if (getNumDWOCompileUnits()) {
+ OS << "\n.debug_info.dwo contents:\n";
+ for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i)
+ getDWOCompileUnitAtIndex(i)->dump(OS);
+ }
- if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) {
- OS << "\n.debug_str.dwo contents:\n";
- DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0);
- offset = 0;
- uint32_t strDWOOffset = 0;
- while (const char *s = strDWOData.getCStr(&offset)) {
- OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
- strDWOOffset = offset;
+ if (DumpType == DIDT_All || DumpType == DIDT_StrDwo)
+ if (!getStringDWOSection().empty()) {
+ OS << "\n.debug_str.dwo contents:\n";
+ DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0);
+ offset = 0;
+ uint32_t strDWOOffset = 0;
+ while (const char *s = strDWOData.getCStr(&offset)) {
+ OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
+ strDWOOffset = offset;
+ }
}
- }
- if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) {
- OS << "\n.debug_str_offsets.dwo contents:\n";
- DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0);
- offset = 0;
- while (offset < getStringOffsetDWOSection().size()) {
- OS << format("0x%8.8x: ", offset);
- OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
+ if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo)
+ if (!getStringOffsetDWOSection().empty()) {
+ OS << "\n.debug_str_offsets.dwo contents:\n";
+ DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0);
+ offset = 0;
+ uint64_t size = getStringOffsetDWOSection().size();
+ while (offset < size) {
+ OS << format("0x%8.8x: ", offset);
+ OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
+ }
}
- }
}
const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
@@ -482,6 +492,22 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
return InliningInfo;
}
+static bool consumeCompressedDebugSectionHeader(StringRef &data,
+ uint64_t &OriginalSize) {
+ // Consume "ZLIB" prefix.
+ if (!data.startswith("ZLIB"))
+ return false;
+ data = data.substr(4);
+ // Consume uncompressed section size (big-endian 8 bytes).
+ DataExtractor extractor(data, false, 8);
+ uint32_t Offset = 0;
+ OriginalSize = extractor.getU64(&Offset);
+ if (Offset == 0)
+ return false;
+ data = data.substr(Offset);
+ return true;
+}
+
DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
IsLittleEndian(Obj->isLittleEndian()),
AddressSize(Obj->getBytesInAddress()) {
@@ -495,49 +521,55 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
i->getContents(data);
name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
- if (name == "debug_info")
- InfoSection = data;
- else if (name == "debug_abbrev")
- AbbrevSection = data;
- else if (name == "debug_line")
- LineSection = data;
- else if (name == "debug_aranges")
- ARangeSection = data;
- else if (name == "debug_frame")
- DebugFrameSection = data;
- else if (name == "debug_str")
- StringSection = data;
- else if (name == "debug_ranges") {
+
+ // Check if debug info section is compressed with zlib.
+ if (name.startswith("zdebug_")) {
+ uint64_t OriginalSize;
+ if (!zlib::isAvailable() ||
+ !consumeCompressedDebugSectionHeader(data, OriginalSize))
+ continue;
+ OwningPtr<MemoryBuffer> UncompressedSection;
+ if (zlib::uncompress(data, UncompressedSection, OriginalSize) !=
+ zlib::StatusOK)
+ continue;
+ // Make data point to uncompressed section contents and save its contents.
+ name = name.substr(1);
+ data = UncompressedSection->getBuffer();
+ UncompressedSections.push_back(UncompressedSection.take());
+ }
+
+ StringRef *Section = StringSwitch<StringRef*>(name)
+ .Case("debug_info", &InfoSection)
+ .Case("debug_abbrev", &AbbrevSection)
+ .Case("debug_line", &LineSection)
+ .Case("debug_aranges", &ARangeSection)
+ .Case("debug_frame", &DebugFrameSection)
+ .Case("debug_str", &StringSection)
+ .Case("debug_ranges", &RangeSection)
+ .Case("debug_pubnames", &PubNamesSection)
+ .Case("debug_info.dwo", &InfoDWOSection)
+ .Case("debug_abbrev.dwo", &AbbrevDWOSection)
+ .Case("debug_str.dwo", &StringDWOSection)
+ .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+ .Case("debug_addr", &AddrSection)
+ // Any more debug info sections go here.
+ .Default(0);
+ if (!Section)
+ continue;
+ *Section = data;
+ if (name == "debug_ranges") {
// FIXME: Use the other dwo range section when we emit it.
RangeDWOSection = data;
- RangeSection = data;
}
- else if (name == "debug_pubnames")
- PubNamesSection = data;
- else if (name == "debug_info.dwo")
- InfoDWOSection = data;
- else if (name == "debug_abbrev.dwo")
- AbbrevDWOSection = data;
- else if (name == "debug_str.dwo")
- StringDWOSection = data;
- else if (name == "debug_str_offsets.dwo")
- StringOffsetDWOSection = data;
- else if (name == "debug_addr")
- AddrSection = data;
- // Any more debug info sections go here.
- else
- continue;
// TODO: Add support for relocations in other sections as needed.
// Record relocations for the debug_info and debug_line sections.
- RelocAddrMap *Map;
- if (name == "debug_info")
- Map = &InfoRelocMap;
- else if (name == "debug_info.dwo")
- Map = &InfoDWORelocMap;
- else if (name == "debug_line")
- Map = &LineRelocMap;
- else
+ RelocAddrMap *Map = StringSwitch<RelocAddrMap*>(name)
+ .Case("debug_info", &InfoRelocMap)
+ .Case("debug_info.dwo", &InfoDWORelocMap)
+ .Case("debug_line", &LineRelocMap)
+ .Default(0);
+ if (!Map)
continue;
if (i->begin_relocations() != i->end_relocations()) {
@@ -547,7 +579,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
reloc_e = i->end_relocations();
reloc_i != reloc_e; reloc_i.increment(ec)) {
uint64_t Address;
- reloc_i->getAddress(Address);
+ reloc_i->getOffset(Address);
uint64_t Type;
reloc_i->getType(Type);
uint64_t SymAddr = 0;
@@ -593,4 +625,8 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
}
}
+DWARFContextInMemory::~DWARFContextInMemory() {
+ DeleteContainerPointers(UncompressedSections);
+}
+
void DWARFContextInMemory::anchor() { }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.h b/contrib/llvm/lib/DebugInfo/DWARFContext.h
index 37b2729..78c18e6 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFContext.h
+++ b/contrib/llvm/lib/DebugInfo/DWARFContext.h
@@ -161,8 +161,11 @@ class DWARFContextInMemory : public DWARFContext {
StringRef RangeDWOSection;
StringRef AddrSection;
+ SmallVector<MemoryBuffer*, 4> UncompressedSections;
+
public:
DWARFContextInMemory(object::ObjectFile *);
+ ~DWARFContextInMemory();
virtual bool isLittleEndian() const { return IsLittleEndian; }
virtual uint8_t getAddressSize() const { return AddressSize; }
virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index 02b15d6..10be7b4 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -11,7 +11,7 @@
#include "DWARFCompileUnit.h"
#include "DWARFContext.h"
#include "DWARFDebugAbbrev.h"
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
@@ -94,279 +94,87 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
OS << ")\n";
}
-bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
- const uint8_t *fixed_form_sizes,
- uint32_t *offset_ptr) {
- Offset = *offset_ptr;
-
- DataExtractor debug_info_data = cu->getDebugInfoExtractor();
- uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr);
-
- assert(fixed_form_sizes); // For best performance this should be specified!
-
- if (abbrCode) {
- uint32_t offset = *offset_ptr;
-
- AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
-
- // Skip all data in the .debug_info for the attributes
- const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
- uint32_t i;
- uint16_t form;
- for (i=0; i<numAttributes; ++i) {
-
- form = AbbrevDecl->getFormByIndex(i);
-
- // FIXME: Currently we're checking if this is less than the last
- // entry in the fixed_form_sizes table, but this should be changed
- // to use dynamic dispatch.
- const uint8_t fixed_skip_size = (form < DW_FORM_ref_sig8) ?
- fixed_form_sizes[form] : 0;
- if (fixed_skip_size)
- offset += fixed_skip_size;
- else {
- bool form_is_indirect = false;
- do {
- form_is_indirect = false;
- uint32_t form_size = 0;
- switch (form) {
- // Blocks if inlined data that have a length field and the data bytes
- // inlined in the .debug_info.
- case DW_FORM_exprloc:
- case DW_FORM_block:
- form_size = debug_info_data.getULEB128(&offset);
- break;
- case DW_FORM_block1:
- form_size = debug_info_data.getU8(&offset);
- break;
- case DW_FORM_block2:
- form_size = debug_info_data.getU16(&offset);
- break;
- case DW_FORM_block4:
- form_size = debug_info_data.getU32(&offset);
- break;
-
- // Inlined NULL terminated C-strings
- case DW_FORM_string:
- debug_info_data.getCStr(&offset);
- break;
-
- // Compile unit address sized values
- case DW_FORM_addr:
- case DW_FORM_ref_addr:
- form_size = cu->getAddressByteSize();
- break;
-
- // 0 sized form.
- case DW_FORM_flag_present:
- form_size = 0;
- break;
-
- // 1 byte values
- case DW_FORM_data1:
- case DW_FORM_flag:
- case DW_FORM_ref1:
- form_size = 1;
- break;
-
- // 2 byte values
- case DW_FORM_data2:
- case DW_FORM_ref2:
- form_size = 2;
- break;
-
- // 4 byte values
- case DW_FORM_strp:
- case DW_FORM_data4:
- case DW_FORM_ref4:
- form_size = 4;
- break;
-
- // 8 byte values
- case DW_FORM_data8:
- case DW_FORM_ref8:
- case DW_FORM_ref_sig8:
- form_size = 8;
- break;
-
- // signed or unsigned LEB 128 values
- case DW_FORM_sdata:
- case DW_FORM_udata:
- case DW_FORM_ref_udata:
- case DW_FORM_GNU_str_index:
- case DW_FORM_GNU_addr_index:
- debug_info_data.getULEB128(&offset);
- break;
-
- case DW_FORM_indirect:
- form_is_indirect = true;
- form = debug_info_data.getULEB128(&offset);
- break;
-
- // FIXME: 64-bit for DWARF64
- case DW_FORM_sec_offset:
- debug_info_data.getU32(offset_ptr);
- break;
-
- default:
- *offset_ptr = Offset;
- return false;
- }
- offset += form_size;
- } while (form_is_indirect);
- }
- }
- *offset_ptr = offset;
- return true;
- } else {
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *CU,
+ const uint8_t *FixedFormSizes,
+ uint32_t *OffsetPtr) {
+ Offset = *OffsetPtr;
+ DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
+ uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
+ if (0 == AbbrCode) {
+ // NULL debug tag entry.
AbbrevDecl = NULL;
- return true; // NULL debug tag entry
+ return true;
+ }
+ AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+ assert(AbbrevDecl);
+ assert(FixedFormSizes); // For best performance this should be specified!
+
+ // Skip all data in the .debug_info for the attributes
+ for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
+ uint16_t Form = AbbrevDecl->getFormByIndex(i);
+
+ // FIXME: Currently we're checking if this is less than the last
+ // entry in the fixed_form_sizes table, but this should be changed
+ // to use dynamic dispatch.
+ uint8_t FixedFormSize =
+ (Form < DW_FORM_ref_sig8) ? FixedFormSizes[Form] : 0;
+ if (FixedFormSize)
+ *OffsetPtr += FixedFormSize;
+ else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
+ CU)) {
+ // Restore the original offset.
+ *OffsetPtr = Offset;
+ return false;
+ }
}
+ return true;
}
bool
-DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu,
- uint32_t *offset_ptr) {
- DataExtractor debug_info_data = cu->getDebugInfoExtractor();
- const uint32_t cu_end_offset = cu->getNextCompileUnitOffset();
- const uint8_t cu_addr_size = cu->getAddressByteSize();
- uint32_t offset = *offset_ptr;
- if ((offset < cu_end_offset) && debug_info_data.isValidOffset(offset)) {
- Offset = offset;
-
- uint64_t abbrCode = debug_info_data.getULEB128(&offset);
-
- if (abbrCode) {
- AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
-
- if (AbbrevDecl) {
- uint16_t tag = AbbrevDecl->getTag();
-
- bool isCompileUnitTag = tag == DW_TAG_compile_unit;
- if(cu && isCompileUnitTag)
- const_cast<DWARFCompileUnit*>(cu)->setBaseAddress(0);
-
- // Skip all data in the .debug_info for the attributes
- const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
- for (uint32_t i = 0; i != numAttributes; ++i) {
- uint16_t attr = AbbrevDecl->getAttrByIndex(i);
- uint16_t form = AbbrevDecl->getFormByIndex(i);
-
- if (isCompileUnitTag &&
- ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) {
- DWARFFormValue form_value(form);
- if (form_value.extractValue(debug_info_data, &offset, cu)) {
- if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc)
- const_cast<DWARFCompileUnit*>(cu)
- ->setBaseAddress(form_value.getUnsigned());
- }
- } else {
- bool form_is_indirect = false;
- do {
- form_is_indirect = false;
- register uint32_t form_size = 0;
- switch (form) {
- // Blocks if inlined data that have a length field and the data
- // bytes // inlined in the .debug_info
- case DW_FORM_exprloc:
- case DW_FORM_block:
- form_size = debug_info_data.getULEB128(&offset);
- break;
- case DW_FORM_block1:
- form_size = debug_info_data.getU8(&offset);
- break;
- case DW_FORM_block2:
- form_size = debug_info_data.getU16(&offset);
- break;
- case DW_FORM_block4:
- form_size = debug_info_data.getU32(&offset);
- break;
-
- // Inlined NULL terminated C-strings
- case DW_FORM_string:
- debug_info_data.getCStr(&offset);
- break;
-
- // Compile unit address sized values
- case DW_FORM_addr:
- case DW_FORM_ref_addr:
- form_size = cu_addr_size;
- break;
-
- // 0 byte value
- case DW_FORM_flag_present:
- form_size = 0;
- break;
-
- // 1 byte values
- case DW_FORM_data1:
- case DW_FORM_flag:
- case DW_FORM_ref1:
- form_size = 1;
- break;
-
- // 2 byte values
- case DW_FORM_data2:
- case DW_FORM_ref2:
- form_size = 2;
- break;
-
- // 4 byte values
- case DW_FORM_strp:
- form_size = 4;
- break;
-
- case DW_FORM_data4:
- case DW_FORM_ref4:
- form_size = 4;
- break;
-
- // 8 byte values
- case DW_FORM_data8:
- case DW_FORM_ref8:
- case DW_FORM_ref_sig8:
- form_size = 8;
- break;
-
- // signed or unsigned LEB 128 values
- case DW_FORM_sdata:
- case DW_FORM_udata:
- case DW_FORM_ref_udata:
- case DW_FORM_GNU_str_index:
- case DW_FORM_GNU_addr_index:
- debug_info_data.getULEB128(&offset);
- break;
-
- case DW_FORM_indirect:
- form = debug_info_data.getULEB128(&offset);
- form_is_indirect = true;
- break;
-
- // FIXME: 64-bit for DWARF64.
- case DW_FORM_sec_offset:
- debug_info_data.getU32(offset_ptr);
- break;
-
- default:
- *offset_ptr = offset;
- return false;
- }
-
- offset += form_size;
- } while (form_is_indirect);
- }
- }
- *offset_ptr = offset;
- return true;
+DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU,
+ uint32_t *OffsetPtr) {
+ DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
+ const uint32_t CUEndOffset = CU->getNextCompileUnitOffset();
+ Offset = *OffsetPtr;
+ if ((Offset >= CUEndOffset) || !DebugInfoData.isValidOffset(Offset))
+ return false;
+ uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
+ if (0 == AbbrCode) {
+ // NULL debug tag entry.
+ AbbrevDecl = NULL;
+ return true;
+ }
+ AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+ if (0 == AbbrevDecl) {
+ // Restore the original offset.
+ *OffsetPtr = Offset;
+ return false;
+ }
+ bool IsCompileUnitTag = (AbbrevDecl->getTag() == DW_TAG_compile_unit);
+ if (IsCompileUnitTag)
+ const_cast<DWARFCompileUnit*>(CU)->setBaseAddress(0);
+
+ // Skip all data in the .debug_info for the attributes
+ for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
+ uint16_t Attr = AbbrevDecl->getAttrByIndex(i);
+ uint16_t Form = AbbrevDecl->getFormByIndex(i);
+
+ if (IsCompileUnitTag &&
+ ((Attr == DW_AT_entry_pc) || (Attr == DW_AT_low_pc))) {
+ DWARFFormValue FormValue(Form);
+ if (FormValue.extractValue(DebugInfoData, OffsetPtr, CU)) {
+ if (Attr == DW_AT_low_pc || Attr == DW_AT_entry_pc)
+ const_cast<DWARFCompileUnit*>(CU)
+ ->setBaseAddress(FormValue.getUnsigned());
}
- } else {
- AbbrevDecl = NULL;
- *offset_ptr = offset;
- return true; // NULL debug tag entry
+ } else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
+ CU)) {
+ // Restore the original offset.
+ *OffsetPtr = Offset;
+ return false;
}
}
-
- return false;
+ return true;
}
bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const {
diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h
index 9c1b2be..9003591 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -45,12 +45,17 @@ public:
uint32_t *offset_ptr, uint16_t attr, uint16_t form,
unsigned indent = 0) const;
- bool extractFast(const DWARFCompileUnit *cu, const uint8_t *fixed_form_sizes,
- uint32_t *offset_ptr);
+ /// Extracts a debug info entry, which is a child of a given compile unit,
+ /// starting at a given offset. If DIE can't be extracted, returns false and
+ /// doesn't change OffsetPtr.
+ bool extractFast(const DWARFCompileUnit *CU, const uint8_t *FixedFormSizes,
+ uint32_t *OffsetPtr);
/// Extract a debug info entry for a given compile unit from the
/// .debug_info and .debug_abbrev data starting at the given offset.
- bool extract(const DWARFCompileUnit *cu, uint32_t *offset_ptr);
+ /// If compile unit can't be parsed, returns false and doesn't change
+ /// OffsetPtr.
+ bool extract(const DWARFCompileUnit *CU, uint32_t *OffsetPtr);
uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
bool isNULL() const { return AbbrevDecl == 0; }
diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
index 9f807aa..c5583f9 100644
--- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
#include "DWARFCompileUnit.h"
#include "DWARFContext.h"
#include "llvm/Support/Debug.h"
@@ -18,39 +18,16 @@
using namespace llvm;
using namespace dwarf;
-static const uint8_t form_sizes_addr4[] = {
- 0, // 0x00 unused
- 4, // 0x01 DW_FORM_addr
- 0, // 0x02 unused
- 0, // 0x03 DW_FORM_block2
- 0, // 0x04 DW_FORM_block4
- 2, // 0x05 DW_FORM_data2
- 4, // 0x06 DW_FORM_data4
- 8, // 0x07 DW_FORM_data8
- 0, // 0x08 DW_FORM_string
- 0, // 0x09 DW_FORM_block
- 0, // 0x0a DW_FORM_block1
- 1, // 0x0b DW_FORM_data1
- 1, // 0x0c DW_FORM_flag
- 0, // 0x0d DW_FORM_sdata
- 4, // 0x0e DW_FORM_strp
- 0, // 0x0f DW_FORM_udata
- 4, // 0x10 DW_FORM_ref_addr
- 1, // 0x11 DW_FORM_ref1
- 2, // 0x12 DW_FORM_ref2
- 4, // 0x13 DW_FORM_ref4
- 8, // 0x14 DW_FORM_ref8
- 0, // 0x15 DW_FORM_ref_udata
- 0, // 0x16 DW_FORM_indirect
- 4, // 0x17 DW_FORM_sec_offset
- 0, // 0x18 DW_FORM_exprloc
- 0, // 0x19 DW_FORM_flag_present
- 8, // 0x20 DW_FORM_ref_sig8
+namespace {
+template <uint8_t AddrSize, uint8_t RefAddrSize> struct FixedFormSizes {
+ static const uint8_t sizes[];
};
+}
-static const uint8_t form_sizes_addr8[] = {
+template <uint8_t AddrSize, uint8_t RefAddrSize>
+const uint8_t FixedFormSizes<AddrSize, RefAddrSize>::sizes[] = {
0, // 0x00 unused
- 8, // 0x01 DW_FORM_addr
+ AddrSize, // 0x01 DW_FORM_addr
0, // 0x02 unused
0, // 0x03 DW_FORM_block2
0, // 0x04 DW_FORM_block4
@@ -65,7 +42,7 @@ static const uint8_t form_sizes_addr8[] = {
0, // 0x0d DW_FORM_sdata
4, // 0x0e DW_FORM_strp
0, // 0x0f DW_FORM_udata
- 8, // 0x10 DW_FORM_ref_addr
+ RefAddrSize, // 0x10 DW_FORM_ref_addr
1, // 0x11 DW_FORM_ref1
2, // 0x12 DW_FORM_ref2
4, // 0x13 DW_FORM_ref4
@@ -78,13 +55,23 @@ static const uint8_t form_sizes_addr8[] = {
8, // 0x20 DW_FORM_ref_sig8
};
+static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) {
+ // FIXME: Support DWARF64.
+ return (Version == 2) ? AddrSize : 4;
+}
+
const uint8_t *
-DWARFFormValue::getFixedFormSizesForAddressSize(uint8_t addr_size) {
- switch (addr_size) {
- case 4: return form_sizes_addr4;
- case 8: return form_sizes_addr8;
- }
- return NULL;
+DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, uint16_t Version) {
+ uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version);
+ if (AddrSize == 4 && RefAddrSize == 4)
+ return FixedFormSizes<4, 4>::sizes;
+ if (AddrSize == 4 && RefAddrSize == 8)
+ return FixedFormSizes<4, 8>::sizes;
+ if (AddrSize == 8 && RefAddrSize == 4)
+ return FixedFormSizes<8, 4>::sizes;
+ if (AddrSize == 8 && RefAddrSize == 8)
+ return FixedFormSizes<8, 8>::sizes;
+ return 0;
}
bool
@@ -100,14 +87,16 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
switch (Form) {
case DW_FORM_addr:
case DW_FORM_ref_addr: {
- RelocAddrMap::const_iterator AI
- = cu->getRelocMap()->find(*offset_ptr);
+ uint16_t AddrSize =
+ (Form == DW_FORM_addr)
+ ? cu->getAddressByteSize()
+ : getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+ RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr);
if (AI != cu->getRelocMap()->end()) {
const std::pair<uint8_t, int64_t> &R = AI->second;
- Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()) +
- R.second;
+ Value.uval = data.getUnsigned(offset_ptr, AddrSize) + R.second;
} else
- Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize());
+ Value.uval = data.getUnsigned(offset_ptr, AddrSize);
break;
}
case DW_FORM_exprloc:
@@ -172,10 +161,17 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
Form = data.getULEB128(offset_ptr);
indirect = true;
break;
- case DW_FORM_sec_offset:
+ case DW_FORM_sec_offset: {
// FIXME: This is 64-bit for DWARF64.
- Value.uval = data.getU32(offset_ptr);
+ RelocAddrMap::const_iterator AI
+ = cu->getRelocMap()->find(*offset_ptr);
+ if (AI != cu->getRelocMap()->end()) {
+ const std::pair<uint8_t, int64_t> &R = AI->second;
+ Value.uval = data.getU32(offset_ptr) + R.second;
+ } else
+ Value.uval = data.getU32(offset_ptr);
break;
+ }
case DW_FORM_flag_present:
Value.uval = 1;
break;
@@ -216,7 +212,6 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
uint32_t *offset_ptr, const DWARFCompileUnit *cu) {
bool indirect = false;
do {
- indirect = false;
switch (form) {
// Blocks if inlined data that have a length field and the data bytes
// inlined in the .debug_info
@@ -249,9 +244,11 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
// Compile unit address sized values
case DW_FORM_addr:
- case DW_FORM_ref_addr:
*offset_ptr += cu->getAddressByteSize();
return true;
+ case DW_FORM_ref_addr:
+ *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+ return true;
// 0 byte values - implied from the form.
case DW_FORM_flag_present:
diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.h b/contrib/llvm/lib/DebugInfo/DWARFFormValue.h
deleted file mode 100644
index b863001..0000000
--- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//===-- DWARFFormValue.h ----------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_DWARFFORMVALUE_H
-#define LLVM_DEBUGINFO_DWARFFORMVALUE_H
-
-#include "llvm/Support/DataExtractor.h"
-
-namespace llvm {
-
-class DWARFCompileUnit;
-class raw_ostream;
-
-class DWARFFormValue {
-public:
- struct ValueType {
- ValueType() : data(NULL) {
- uval = 0;
- }
-
- union {
- uint64_t uval;
- int64_t sval;
- const char* cstr;
- };
- const uint8_t* data;
- };
-
- enum {
- eValueTypeInvalid = 0,
- eValueTypeUnsigned,
- eValueTypeSigned,
- eValueTypeCStr,
- eValueTypeBlock
- };
-
-private:
- uint16_t Form; // Form for this value.
- ValueType Value; // Contains all data for the form.
-
-public:
- DWARFFormValue(uint16_t form = 0) : Form(form) {}
- uint16_t getForm() const { return Form; }
- const ValueType& value() const { return Value; }
- void dump(raw_ostream &OS, const DWARFCompileUnit* cu) const;
- bool extractValue(DataExtractor data, uint32_t *offset_ptr,
- const DWARFCompileUnit *cu);
- bool isInlinedCStr() const {
- return Value.data != NULL && Value.data == (const uint8_t*)Value.cstr;
- }
- const uint8_t *BlockData() const;
- uint64_t getReference(const DWARFCompileUnit* cu) const;
-
- /// Resolve any compile unit specific references so that we don't need
- /// the compile unit at a later time in order to work with the form
- /// value.
- bool resolveCompileUnitReferences(const DWARFCompileUnit* cu);
- uint64_t getUnsigned() const { return Value.uval; }
- int64_t getSigned() const { return Value.sval; }
- const char *getAsCString(const DataExtractor *debug_str_data_ptr) const;
- const char *getIndirectCString(const DataExtractor *,
- const DataExtractor *) const;
- uint64_t getIndirectAddress(const DataExtractor *,
- const DWARFCompileUnit *) const;
- bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr,
- const DWARFCompileUnit *cu) const;
- static bool skipValue(uint16_t form, DataExtractor debug_info_data,
- uint32_t *offset_ptr, const DWARFCompileUnit *cu);
- static bool isBlockForm(uint16_t form);
- static bool isDataForm(uint16_t form);
- static const uint8_t *getFixedFormSizesForAddressSize(uint8_t addr_size);
-};
-
-}
-
-#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index 906a3a3..e43ba4f 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -948,7 +948,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
- if (sys::isLittleEndianHost()) {
+ if (sys::IsLittleEndianHost) {
// Little-endian host - the source is ordered from LSB to MSB. Order the
// destination from LSB to MSB: Do a straight copy.
memcpy(Dst, Src, StoreBytes);
@@ -1009,7 +1009,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
break;
}
- if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
+ if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian())
// Host and target are different endian - reverse the stored bytes.
std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
}
@@ -1021,7 +1021,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
uint8_t *Dst = reinterpret_cast<uint8_t *>(
const_cast<uint64_t *>(IntVal.getRawData()));
- if (sys::isLittleEndianHost())
+ if (sys::IsLittleEndianHost)
// Little-endian host - the destination must be ordered from LSB to MSB.
// The source is ordered from LSB to MSB: Do a straight copy.
memcpy(Dst, Src, LoadBytes);
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index f4e8246..f9b08a0 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -15,11 +15,33 @@
#include "llvm-c/ExecutionEngine.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstring>
using namespace llvm;
+// Wrapping the C bindings types.
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef)
+
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+ return reinterpret_cast<DataLayout*>(P);
+}
+
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+ return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+ return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+ TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+ return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
+
/*===-- Operations on generic values --------------------------------------===*/
LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
@@ -132,6 +154,59 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
return 1;
}
+void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions,
+ size_t SizeOfPassedOptions) {
+ LLVMMCJITCompilerOptions options;
+ options.OptLevel = 0;
+ options.CodeModel = LLVMCodeModelJITDefault;
+ options.NoFramePointerElim = false;
+ options.EnableFastISel = false;
+
+ memcpy(PassedOptions, &options,
+ std::min(sizeof(options), SizeOfPassedOptions));
+}
+
+LLVMBool LLVMCreateMCJITCompilerForModule(
+ LLVMExecutionEngineRef *OutJIT, LLVMModuleRef M,
+ LLVMMCJITCompilerOptions *PassedOptions, size_t SizeOfPassedOptions,
+ char **OutError) {
+ LLVMMCJITCompilerOptions options;
+ // If the user passed a larger sized options struct, then they were compiled
+ // against a newer LLVM. Tell them that something is wrong.
+ if (SizeOfPassedOptions > sizeof(options)) {
+ *OutError = strdup(
+ "Refusing to use options struct that is larger than my own; assuming "
+ "LLVM library mismatch.");
+ return 1;
+ }
+
+ // Defend against the user having an old version of the API by ensuring that
+ // any fields they didn't see are cleared. We must defend against fields being
+ // set to the bitwise equivalent of zero, and assume that this means "do the
+ // default" as if that option hadn't been available.
+ LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
+ memcpy(&options, PassedOptions, SizeOfPassedOptions);
+
+ TargetOptions targetOptions;
+ targetOptions.NoFramePointerElim = options.NoFramePointerElim;
+ targetOptions.EnableFastISel = options.EnableFastISel;
+
+ std::string Error;
+ EngineBuilder builder(unwrap(M));
+ builder.setEngineKind(EngineKind::JIT)
+ .setErrorStr(&Error)
+ .setUseMCJIT(true)
+ .setOptLevel((CodeGenOpt::Level)options.OptLevel)
+ .setCodeModel(unwrap(options.CodeModel))
+ .setTargetOptions(targetOptions);
+ if (ExecutionEngine *JIT = builder.create()) {
+ *OutJIT = wrap(JIT);
+ return 0;
+ }
+ *OutError = strdup(Error.c_str());
+ return 1;
+}
+
LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
LLVMModuleProviderRef MP,
char **OutError) {
@@ -176,6 +251,8 @@ void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
unsigned ArgC, const char * const *ArgV,
const char * const *EnvP) {
+ unwrap(EE)->finalizeObject();
+
std::vector<std::string> ArgVec;
for (unsigned I = 0; I != ArgC; ++I)
ArgVec.push_back(ArgV[I]);
@@ -186,6 +263,8 @@ int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
unsigned NumArgs,
LLVMGenericValueRef *Args) {
+ unwrap(EE)->finalizeObject();
+
std::vector<GenericValue> ArgVec;
ArgVec.reserve(NumArgs);
for (unsigned I = 0; I != NumArgs; ++I)
@@ -234,7 +313,8 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
return 1;
}
-void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE,
+ LLVMValueRef Fn) {
return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
}
@@ -248,5 +328,7 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
}
void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+ unwrap(EE)->finalizeObject();
+
return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
}
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 526c04e..b95a9e8 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -114,6 +114,15 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
break;
+#define IMPLEMENT_VECTOR_INTEGER_ICMP(OP, TY) \
+ case Type::VectorTyID: { \
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); \
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() ); \
+ for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++) \
+ Dest.AggregateVal[_i].IntVal = APInt(1, \
+ Src1.AggregateVal[_i].IntVal.OP(Src2.AggregateVal[_i].IntVal));\
+ } break;
+
// Handle pointers specially because they must be compared with only as much
// width as the host has. We _do not_ want to be comparing 64 bit values when
// running on a 32-bit target, otherwise the upper 32 bits might mess up
@@ -129,6 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(eq,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(eq,Ty);
IMPLEMENT_POINTER_ICMP(==);
default:
dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
@@ -142,6 +152,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ne,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(ne,Ty);
IMPLEMENT_POINTER_ICMP(!=);
default:
dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
@@ -155,6 +166,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ult,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(ult,Ty);
IMPLEMENT_POINTER_ICMP(<);
default:
dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
@@ -168,6 +180,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(slt,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(slt,Ty);
IMPLEMENT_POINTER_ICMP(<);
default:
dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
@@ -181,6 +194,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(ugt,Ty);
IMPLEMENT_POINTER_ICMP(>);
default:
dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
@@ -194,6 +208,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(sgt,Ty);
IMPLEMENT_POINTER_ICMP(>);
default:
dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
@@ -207,6 +222,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(ule,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(ule,Ty);
IMPLEMENT_POINTER_ICMP(<=);
default:
dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
@@ -220,6 +236,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sle,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(sle,Ty);
IMPLEMENT_POINTER_ICMP(<=);
default:
dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
@@ -233,6 +250,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(uge,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(uge,Ty);
IMPLEMENT_POINTER_ICMP(>=);
default:
dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
@@ -246,6 +264,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_INTEGER_ICMP(sge,Ty);
+ IMPLEMENT_VECTOR_INTEGER_ICMP(sge,Ty);
IMPLEMENT_POINTER_ICMP(>=);
default:
dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
@@ -285,12 +304,29 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
break
+#define IMPLEMENT_VECTOR_FCMP_T(OP, TY) \
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); \
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() ); \
+ for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++) \
+ Dest.AggregateVal[_i].IntVal = APInt(1, \
+ Src1.AggregateVal[_i].TY##Val OP Src2.AggregateVal[_i].TY##Val);\
+ break;
+
+#define IMPLEMENT_VECTOR_FCMP(OP) \
+ case Type::VectorTyID: \
+ if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) { \
+ IMPLEMENT_VECTOR_FCMP_T(OP, Float); \
+ } else { \
+ IMPLEMENT_VECTOR_FCMP_T(OP, Double); \
+ }
+
static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(==, Float);
IMPLEMENT_FCMP(==, Double);
+ IMPLEMENT_VECTOR_FCMP(==);
default:
dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -298,17 +334,65 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
return Dest;
}
+#define IMPLEMENT_SCALAR_NANS(TY, X,Y) \
+ if (TY->isFloatTy()) { \
+ if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \
+ Dest.IntVal = APInt(1,false); \
+ return Dest; \
+ } \
+ } else { \
+ if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \
+ Dest.IntVal = APInt(1,false); \
+ return Dest; \
+ } \
+ }
+
+#define MASK_VECTOR_NANS_T(X,Y, TZ, FLAG) \
+ assert(X.AggregateVal.size() == Y.AggregateVal.size()); \
+ Dest.AggregateVal.resize( X.AggregateVal.size() ); \
+ for( uint32_t _i=0;_i<X.AggregateVal.size();_i++) { \
+ if (X.AggregateVal[_i].TZ##Val != X.AggregateVal[_i].TZ##Val || \
+ Y.AggregateVal[_i].TZ##Val != Y.AggregateVal[_i].TZ##Val) \
+ Dest.AggregateVal[_i].IntVal = APInt(1,FLAG); \
+ else { \
+ Dest.AggregateVal[_i].IntVal = APInt(1,!FLAG); \
+ } \
+ }
+
+#define MASK_VECTOR_NANS(TY, X,Y, FLAG) \
+ if (TY->isVectorTy()) { \
+ if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) { \
+ MASK_VECTOR_NANS_T(X, Y, Float, FLAG) \
+ } else { \
+ MASK_VECTOR_NANS_T(X, Y, Double, FLAG) \
+ } \
+ } \
+
+
+
static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
- Type *Ty) {
+ Type *Ty)
+{
GenericValue Dest;
+ // if input is scalar value and Src1 or Src2 is NaN return false
+ IMPLEMENT_SCALAR_NANS(Ty, Src1, Src2)
+ // if vector input detect NaNs and fill mask
+ MASK_VECTOR_NANS(Ty, Src1, Src2, false)
+ GenericValue DestMask = Dest;
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(!=, Float);
IMPLEMENT_FCMP(!=, Double);
-
- default:
- dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ IMPLEMENT_VECTOR_FCMP(!=);
+ default:
+ dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
}
+ // in vector case mask out NaN elements
+ if (Ty->isVectorTy())
+ for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+ if (DestMask.AggregateVal[_i].IntVal == false)
+ Dest.AggregateVal[_i].IntVal = APInt(1,false);
+
return Dest;
}
@@ -318,6 +402,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(<=, Float);
IMPLEMENT_FCMP(<=, Double);
+ IMPLEMENT_VECTOR_FCMP(<=);
default:
dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -331,6 +416,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(>=, Float);
IMPLEMENT_FCMP(>=, Double);
+ IMPLEMENT_VECTOR_FCMP(>=);
default:
dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -344,6 +430,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(<, Float);
IMPLEMENT_FCMP(<, Double);
+ IMPLEMENT_VECTOR_FCMP(<);
default:
dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -357,6 +444,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
switch (Ty->getTypeID()) {
IMPLEMENT_FCMP(>, Float);
IMPLEMENT_FCMP(>, Double);
+ IMPLEMENT_VECTOR_FCMP(>);
default:
dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
llvm_unreachable(0);
@@ -375,18 +463,32 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
return Dest; \
}
+#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC) \
+ if (TY->isVectorTy()) { \
+ GenericValue DestMask = Dest; \
+ Dest = _FUNC(Src1, Src2, Ty); \
+ for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) \
+ if (DestMask.AggregateVal[_i].IntVal == true) \
+ Dest.AggregateVal[_i].IntVal = APInt(1,true); \
+ return Dest; \
+ }
static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OEQ)
return executeFCMP_OEQ(Src1, Src2, Ty);
+
}
static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_ONE)
return executeFCMP_ONE(Src1, Src2, Ty);
}
@@ -394,6 +496,8 @@ static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLE)
return executeFCMP_OLE(Src1, Src2, Ty);
}
@@ -401,6 +505,8 @@ static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGE)
return executeFCMP_OGE(Src1, Src2, Ty);
}
@@ -408,6 +514,8 @@ static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLT)
return executeFCMP_OLT(Src1, Src2, Ty);
}
@@ -415,33 +523,88 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+ MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+ IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGT)
return executeFCMP_OGT(Src1, Src2, Ty);
}
static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
- if (Ty->isFloatTy())
+ if(Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+ if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+ for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,
+ ( (Src1.AggregateVal[_i].FloatVal ==
+ Src1.AggregateVal[_i].FloatVal) &&
+ (Src2.AggregateVal[_i].FloatVal ==
+ Src2.AggregateVal[_i].FloatVal)));
+ } else {
+ for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,
+ ( (Src1.AggregateVal[_i].DoubleVal ==
+ Src1.AggregateVal[_i].DoubleVal) &&
+ (Src2.AggregateVal[_i].DoubleVal ==
+ Src2.AggregateVal[_i].DoubleVal)));
+ }
+ } else if (Ty->isFloatTy())
Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal &&
Src2.FloatVal == Src2.FloatVal));
- else
+ else {
Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal &&
Src2.DoubleVal == Src2.DoubleVal));
+ }
return Dest;
}
static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
Type *Ty) {
GenericValue Dest;
- if (Ty->isFloatTy())
+ if(Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+ if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+ for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,
+ ( (Src1.AggregateVal[_i].FloatVal !=
+ Src1.AggregateVal[_i].FloatVal) ||
+ (Src2.AggregateVal[_i].FloatVal !=
+ Src2.AggregateVal[_i].FloatVal)));
+ } else {
+ for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,
+ ( (Src1.AggregateVal[_i].DoubleVal !=
+ Src1.AggregateVal[_i].DoubleVal) ||
+ (Src2.AggregateVal[_i].DoubleVal !=
+ Src2.AggregateVal[_i].DoubleVal)));
+ }
+ } else if (Ty->isFloatTy())
Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal ||
Src2.FloatVal != Src2.FloatVal));
- else
+ else {
Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal ||
Src2.DoubleVal != Src2.DoubleVal));
+ }
return Dest;
}
+static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2,
+ const Type *Ty, const bool val) {
+ GenericValue Dest;
+ if(Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+ for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+ Dest.AggregateVal[_i].IntVal = APInt(1,val);
+ } else {
+ Dest.IntVal = APInt(1, val);
+ }
+
+ return Dest;
+}
+
void Interpreter::visitFCmpInst(FCmpInst &I) {
ExecutionContext &SF = ECStack.back();
Type *Ty = I.getOperand(0)->getType();
@@ -450,8 +613,14 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
GenericValue R; // Result
switch (I.getPredicate()) {
- case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
- case FCmpInst::FCMP_TRUE: R.IntVal = APInt(1,true); break;
+ default:
+ dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+ llvm_unreachable(0);
+ break;
+ case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false);
+ break;
+ case FCmpInst::FCMP_TRUE: R = executeFCMP_BOOL(Src1, Src2, Ty, true);
+ break;
case FCmpInst::FCMP_ORD: R = executeFCMP_ORD(Src1, Src2, Ty); break;
case FCmpInst::FCMP_UNO: R = executeFCMP_UNO(Src1, Src2, Ty); break;
case FCmpInst::FCMP_UEQ: R = executeFCMP_UEQ(Src1, Src2, Ty); break;
@@ -466,9 +635,6 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_OLE: R = executeFCMP_OLE(Src1, Src2, Ty); break;
case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break;
case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break;
- default:
- dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
- llvm_unreachable(0);
}
SetValue(&I, R, SF);
@@ -502,16 +668,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
case FCmpInst::FCMP_ULE: return executeFCMP_ULE(Src1, Src2, Ty);
case FCmpInst::FCMP_OGE: return executeFCMP_OGE(Src1, Src2, Ty);
case FCmpInst::FCMP_UGE: return executeFCMP_UGE(Src1, Src2, Ty);
- case FCmpInst::FCMP_FALSE: {
- GenericValue Result;
- Result.IntVal = APInt(1, false);
- return Result;
- }
- case FCmpInst::FCMP_TRUE: {
- GenericValue Result;
- Result.IntVal = APInt(1, true);
- return Result;
- }
+ case FCmpInst::FCMP_FALSE: return executeFCMP_BOOL(Src1, Src2, Ty, false);
+ case FCmpInst::FCMP_TRUE: return executeFCMP_BOOL(Src1, Src2, Ty, true);
default:
dbgs() << "Unhandled Cmp predicate\n";
llvm_unreachable(0);
@@ -525,27 +683,105 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
GenericValue R; // Result
- switch (I.getOpcode()) {
- case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break;
- case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break;
- case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break;
- case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break;
- case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break;
- case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break;
- case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break;
- case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break;
- case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
- case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
- case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
- case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
- case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break;
- case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break;
- case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
- default:
- dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
- llvm_unreachable(0);
+ // First process vector operation
+ if (Ty->isVectorTy()) {
+ assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+ R.AggregateVal.resize(Src1.AggregateVal.size());
+
+ // Macros to execute binary operation 'OP' over integer vectors
+#define INTEGER_VECTOR_OPERATION(OP) \
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \
+ R.AggregateVal[i].IntVal = \
+ Src1.AggregateVal[i].IntVal OP Src2.AggregateVal[i].IntVal;
+
+ // Additional macros to execute binary operations udiv/sdiv/urem/srem since
+ // they have different notation.
+#define INTEGER_VECTOR_FUNCTION(OP) \
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \
+ R.AggregateVal[i].IntVal = \
+ Src1.AggregateVal[i].IntVal.OP(Src2.AggregateVal[i].IntVal);
+
+ // Macros to execute binary operation 'OP' over floating point type TY
+ // (float or double) vectors
+#define FLOAT_VECTOR_FUNCTION(OP, TY) \
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i) \
+ R.AggregateVal[i].TY = \
+ Src1.AggregateVal[i].TY OP Src2.AggregateVal[i].TY;
+
+ // Macros to choose appropriate TY: float or double and run operation
+ // execution
+#define FLOAT_VECTOR_OP(OP) { \
+ if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) \
+ FLOAT_VECTOR_FUNCTION(OP, FloatVal) \
+ else { \
+ if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy()) \
+ FLOAT_VECTOR_FUNCTION(OP, DoubleVal) \
+ else { \
+ dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
+ llvm_unreachable(0); \
+ } \
+ } \
+}
+
+ switch(I.getOpcode()){
+ default:
+ dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+ llvm_unreachable(0);
+ break;
+ case Instruction::Add: INTEGER_VECTOR_OPERATION(+) break;
+ case Instruction::Sub: INTEGER_VECTOR_OPERATION(-) break;
+ case Instruction::Mul: INTEGER_VECTOR_OPERATION(*) break;
+ case Instruction::UDiv: INTEGER_VECTOR_FUNCTION(udiv) break;
+ case Instruction::SDiv: INTEGER_VECTOR_FUNCTION(sdiv) break;
+ case Instruction::URem: INTEGER_VECTOR_FUNCTION(urem) break;
+ case Instruction::SRem: INTEGER_VECTOR_FUNCTION(srem) break;
+ case Instruction::And: INTEGER_VECTOR_OPERATION(&) break;
+ case Instruction::Or: INTEGER_VECTOR_OPERATION(|) break;
+ case Instruction::Xor: INTEGER_VECTOR_OPERATION(^) break;
+ case Instruction::FAdd: FLOAT_VECTOR_OP(+) break;
+ case Instruction::FSub: FLOAT_VECTOR_OP(-) break;
+ case Instruction::FMul: FLOAT_VECTOR_OP(*) break;
+ case Instruction::FDiv: FLOAT_VECTOR_OP(/) break;
+ case Instruction::FRem:
+ if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+ R.AggregateVal[i].FloatVal =
+ fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
+ else {
+ if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+ for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+ R.AggregateVal[i].DoubleVal =
+ fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
+ else {
+ dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+ llvm_unreachable(0);
+ }
+ }
+ break;
+ }
+ } else {
+ switch (I.getOpcode()) {
+ default:
+ dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+ llvm_unreachable(0);
+ break;
+ case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break;
+ case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break;
+ case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break;
+ case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break;
+ case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break;
+ case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break;
+ case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break;
+ case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break;
+ case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+ case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+ case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+ case Instruction::SRem: R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+ case Instruction::And: R.IntVal = Src1.IntVal & Src2.IntVal; break;
+ case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break;
+ case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+ }
}
-
SetValue(&I, R, SF);
}
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index fee10e1..38aa547 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -14,6 +14,7 @@
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/ExecutionEngine/ObjectBuffer.h"
#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -46,13 +47,14 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
// FIXME: Don't do this here.
sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
- return new MCJIT(M, TM, JMM, GVsWithCode);
+ return new MCJIT(M, TM, JMM ? JMM : new SectionMemoryManager(), GVsWithCode);
}
MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
bool AllocateGVsWithCode)
- : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
- isCompiled(false), M(m) {
+ : ExecutionEngine(m), TM(tm), Ctx(0),
+ MemMgr(MM ? MM : new SectionMemoryManager()), Dyld(MemMgr),
+ IsLoaded(false), M(m), ObjCache(0) {
setDataLayout(TM->getDataLayout());
}
@@ -64,7 +66,11 @@ MCJIT::~MCJIT() {
delete TM;
}
-void MCJIT::emitObject(Module *m) {
+void MCJIT::setObjectCache(ObjectCache* NewCache) {
+ ObjCache = NewCache;
+}
+
+ObjectBufferStream* MCJIT::emitObject(Module *m) {
/// Currently, MCJIT only supports a single module and the module passed to
/// this function call is expected to be the contained module. The module
/// is passed as a parameter here to prepare for multiple module support in
@@ -77,30 +83,66 @@ void MCJIT::emitObject(Module *m) {
// FIXME: Track compilation state on a per-module basis when multiple modules
// are supported.
// Re-compilation is not supported
- if (isCompiled)
- return;
+ assert(!IsLoaded);
PassManager PM;
PM.add(new DataLayout(*TM->getDataLayout()));
// The RuntimeDyld will take ownership of this shortly
- OwningPtr<ObjectBufferStream> Buffer(new ObjectBufferStream());
+ OwningPtr<ObjectBufferStream> CompiledObject(new ObjectBufferStream());
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM->addPassesToEmitMC(PM, Ctx, Buffer->getOStream(), false)) {
+ if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(), false)) {
report_fatal_error("Target does not support MC emission!");
}
// Initialize passes.
PM.run(*m);
// Flush the output buffer to get the generated code into memory
- Buffer->flush();
+ CompiledObject->flush();
+
+ // If we have an object cache, tell it about the new object.
+ // Note that we're using the compiled image, not the loaded image (as below).
+ if (ObjCache) {
+ // MemoryBuffer is a thin wrapper around the actual memory, so it's OK
+ // to create a temporary object here and delete it after the call.
+ OwningPtr<MemoryBuffer> MB(CompiledObject->getMemBuffer());
+ ObjCache->notifyObjectCompiled(m, MB.get());
+ }
+
+ return CompiledObject.take();
+}
+
+void MCJIT::loadObject(Module *M) {
+
+ // Get a thread lock to make sure we aren't trying to load multiple times
+ MutexGuard locked(lock);
+
+ // FIXME: Track compilation state on a per-module basis when multiple modules
+ // are supported.
+ // Re-compilation is not supported
+ if (IsLoaded)
+ return;
+
+ OwningPtr<ObjectBuffer> ObjectToLoad;
+ // Try to load the pre-compiled object from cache if possible
+ if (0 != ObjCache) {
+ OwningPtr<MemoryBuffer> PreCompiledObject(ObjCache->getObjectCopy(M));
+ if (0 != PreCompiledObject.get())
+ ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.take()));
+ }
+
+ // If the cache did not contain a suitable object, compile the object
+ if (!ObjectToLoad) {
+ ObjectToLoad.reset(emitObject(M));
+ assert(ObjectToLoad.get() && "Compilation did not produce an object.");
+ }
// Load the object into the dynamic linker.
// handing off ownership of the buffer
- LoadedObject.reset(Dyld.loadObject(Buffer.take()));
+ LoadedObject.reset(Dyld.loadObject(ObjectToLoad.take()));
if (!LoadedObject)
report_fatal_error(Dyld.getErrorString());
@@ -113,7 +155,7 @@ void MCJIT::emitObject(Module *m) {
NotifyObjectEmitted(*LoadedObject);
// FIXME: Add support for per-module compilation state
- isCompiled = true;
+ IsLoaded = true;
}
// FIXME: Add a parameter to identify which object is being finalized when
@@ -122,19 +164,18 @@ void MCJIT::emitObject(Module *m) {
// protection in the interface.
void MCJIT::finalizeObject() {
// If the module hasn't been compiled, just do that.
- if (!isCompiled) {
- // If the call to Dyld.resolveRelocations() is removed from emitObject()
+ if (!IsLoaded) {
+ // If the call to Dyld.resolveRelocations() is removed from loadObject()
// we'll need to do that here.
- emitObject(M);
-
- // Set page permissions.
- MemMgr->applyPermissions();
-
- return;
+ loadObject(M);
+ } else {
+ // Resolve any relocations.
+ Dyld.resolveRelocations();
}
- // Resolve any relocations.
- Dyld.resolveRelocations();
+ StringRef EHData = Dyld.getEHFrameSection();
+ if (!EHData.empty())
+ MemMgr->registerEHFrames(EHData);
// Set page permissions.
MemMgr->applyPermissions();
@@ -151,8 +192,8 @@ void *MCJIT::getPointerToFunction(Function *F) {
// dies.
// FIXME: Add support for per-module compilation state
- if (!isCompiled)
- emitObject(M);
+ if (!IsLoaded)
+ loadObject(M);
if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
bool AbortOnFailure = !F->hasExternalWeakLinkage();
@@ -284,8 +325,8 @@ GenericValue MCJIT::runFunction(Function *F,
void *MCJIT::getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure) {
// FIXME: Add support for per-module compilation state
- if (!isCompiled)
- emitObject(M);
+ if (!IsLoaded)
+ loadObject(M);
if (!isSymbolSearchingDisabled() && MemMgr) {
void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index 283a8e5..8c4bf6e 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "llvm/PassManager.h"
@@ -34,16 +35,23 @@ class MCJIT : public ExecutionEngine {
SmallVector<JITEventListener*, 2> EventListeners;
// FIXME: Add support for multiple modules
- bool isCompiled;
+ bool IsLoaded;
Module *M;
OwningPtr<ObjectImage> LoadedObject;
+ // An optional ObjectCache to be notified of compiled objects and used to
+ // perform lookup of pre-compiled code to avoid re-compilation.
+ ObjectCache *ObjCache;
+
public:
~MCJIT();
/// @name ExecutionEngine interface implementation
/// @{
+ /// Sets the object manager that MCJIT should use to avoid compilation.
+ virtual void setObjectCache(ObjectCache *manager);
+
virtual void finalizeObject();
virtual void *getPointerToBasicBlock(BasicBlock *BB);
@@ -102,7 +110,9 @@ protected:
/// this function call is expected to be the contained module. The module
/// is passed as a parameter here to prepare for multiple module support in
/// the future.
- void emitObject(Module *M);
+ ObjectBufferStream* emitObject(Module *M);
+
+ void loadObject(Module *M);
void NotifyObjectEmitted(const ObjectImage& Obj);
void NotifyFreeingObject(const ObjectImage& Obj);
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
index fa35acd..bac77ce 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -138,9 +138,46 @@ bool SectionMemoryManager::applyPermissions(std::string *ErrMsg)
// Read-write data memory already has the correct permissions
+ // Some platforms with separate data cache and instruction cache require
+ // explicit cache flush, otherwise JIT code manipulations (like resolved
+ // relocations) will get to the data cache but not to the instruction cache.
+ invalidateInstructionCache();
+
return false;
}
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+ !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+extern "C" void __register_frame(void*);
+
+static const char *processFDE(const char *Entry) {
+ const char *P = Entry;
+ uint32_t Length = *((uint32_t*)P);
+ P += 4;
+ uint32_t Offset = *((uint32_t*)P);
+ if (Offset != 0)
+ __register_frame((void*)Entry);
+ return P + Length;
+}
+#endif
+
+void SectionMemoryManager::registerEHFrames(StringRef SectionData) {
+#if HAVE_EHTABLE_SUPPORT
+ const char *P = SectionData.data();
+ const char *End = SectionData.data() + SectionData.size();
+ do {
+ P = processFDE(P);
+ } while(P != End);
+#endif
+}
+
error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
unsigned Permissions) {
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 409b25f..a08b508 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -25,10 +25,15 @@ using namespace llvm::object;
// Empty out-of-line virtual destructor as the key function.
RTDyldMemoryManager::~RTDyldMemoryManager() {}
+void RTDyldMemoryManager::registerEHFrames(StringRef SectionData) {}
RuntimeDyldImpl::~RuntimeDyldImpl() {}
namespace llvm {
+StringRef RuntimeDyldImpl::getEHFrameSection() {
+ return StringRef();
+}
+
// Resolve the relocations for all symbols we currently know about.
void RuntimeDyldImpl::resolveRelocations() {
// First, resolve relocations associated with external symbols.
@@ -96,7 +101,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
bool isCommon = flags & SymbolRef::SF_Common;
if (isCommon) {
// Add the common symbols to a list. We'll allocate them all below.
- uint64_t Align = getCommonSymbolAlignment(*i);
+ uint32_t Align;
+ Check(i->getAlignment(Align));
uint64_t Size = 0;
Check(i->getSize(Size));
CommonSize += Size + Align;
@@ -154,18 +160,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
isFirstRelocation = false;
}
- ObjRelocationInfo RI;
- RI.SectionID = SectionID;
- Check(i->getAdditionalInfo(RI.AdditionalInfo));
- Check(i->getOffset(RI.Offset));
- Check(i->getSymbol(RI.Symbol));
- Check(i->getType(RI.Type));
-
- DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
- << " Offset: " << format("%p", (uintptr_t)RI.Offset)
- << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
- << "\n");
- processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+ processRelocationRef(SectionID, *i, *obj, LocalSections, LocalSymbols,
+ Stubs);
}
}
@@ -183,7 +179,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
if (!Addr)
report_fatal_error("Unable to allocate memory for common symbols!");
uint64_t Offset = 0;
- Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0));
+ Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, 0));
memset(Addr, 0, TotalSize);
DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
@@ -243,6 +239,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
Check(Section.isReadOnlyData(IsReadOnly));
Check(Section.getSize(DataSize));
Check(Section.getName(Name));
+ if (StubSize > 0) {
+ unsigned StubAlignment = getStubAlignment();
+ unsigned EndAlignment = (DataSize | Alignment) & -(DataSize | Alignment);
+ if (StubAlignment > EndAlignment)
+ StubBufSize += StubAlignment - EndAlignment;
+ }
unsigned Allocate;
unsigned SectionID = Sections.size();
@@ -295,8 +297,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
<< "\n");
}
- Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize,
- (uintptr_t)pData));
+ Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData));
return SectionID;
}
@@ -339,7 +340,25 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
}
uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
- if (Arch == Triple::arm) {
+ if (Arch == Triple::aarch64) {
+ // This stub has to be able to access the full address space,
+ // since symbol lookup won't necessarily find a handy, in-range,
+ // PLT stub for functions which could be anywhere.
+ uint32_t *StubAddr = (uint32_t*)Addr;
+
+ // Stub can use ip0 (== x16) to calculate address
+ *StubAddr = 0xd2e00010; // movz ip0, #:abs_g3:<addr>
+ StubAddr++;
+ *StubAddr = 0xf2c00010; // movk ip0, #:abs_g2_nc:<addr>
+ StubAddr++;
+ *StubAddr = 0xf2a00010; // movk ip0, #:abs_g1_nc:<addr>
+ StubAddr++;
+ *StubAddr = 0xf2800010; // movk ip0, #:abs_g0_nc:<addr>
+ StubAddr++;
+ *StubAddr = 0xd61f0200; // br ip0
+
+ return Addr;
+ } else if (Arch == Triple::arm) {
// TODO: There is only ARM far stub now. We should add the Thumb stub,
// and stubs for branches Thumb - ARM and ARM - Thumb.
uint32_t *StubAddr = (uint32_t*)Addr;
@@ -380,6 +399,13 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
writeInt32BE(Addr+40, 0x4E800420); // bctr
return Addr;
+ } else if (Arch == Triple::systemz) {
+ writeInt16BE(Addr, 0xC418); // lgrl %r1,.+8
+ writeInt16BE(Addr+2, 0x0000);
+ writeInt16BE(Addr+4, 0x0004);
+ writeInt16BE(Addr+6, 0x07F1); // brc 15,%r1
+ // 8-byte address stored at Addr + 8
+ return Addr;
}
return Addr;
}
@@ -401,26 +427,14 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
Sections[SectionID].LoadAddress = Addr;
}
-void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
- uint64_t Value) {
- // Ignore relocations for sections that were not loaded
- if (Sections[RE.SectionID].Address != 0) {
- DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
- << " + " << RE.Offset << " ("
- << format("%p", Sections[RE.SectionID].Address + RE.Offset) << ")"
- << " RelType: " << RE.RelType
- << " Addend: " << RE.Addend
- << "\n");
-
- resolveRelocation(Sections[RE.SectionID], RE.Offset,
- Value, RE.RelType, RE.Addend);
- }
-}
-
void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
uint64_t Value) {
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
- resolveRelocationEntry(Relocs[i], Value);
+ const RelocationEntry &RE = Relocs[i];
+ // Ignore relocations for sections that were not loaded
+ if (Sections[RE.SectionID].Address == 0)
+ continue;
+ resolveRelocation(RE, Value);
}
}
@@ -534,4 +548,8 @@ StringRef RuntimeDyld::getErrorString() {
return Dyld->getErrorString();
}
+StringRef RuntimeDyld::getEHFrameSection() {
+ return Dyld->getEHFrameSection();
+}
+
} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index b8537b1..d4d84d3 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -41,7 +41,7 @@ error_code check(error_code Err) {
template<class ELFT>
class DyldELFObject
: public ELFObjectFile<ELFT> {
- LLVM_ELF_IMPORT_TYPES(ELFT)
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
typedef Elf_Sym_Impl<ELFT> Elf_Sym;
@@ -151,6 +151,14 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
namespace llvm {
+StringRef RuntimeDyldELF::getEHFrameSection() {
+ for (int i = 0, e = Sections.size(); i != e; ++i) {
+ if (Sections[i].Name == ".eh_frame")
+ return StringRef((const char*)Sections[i].Address, Sections[i].Size);
+ }
+ return StringRef();
+}
+
ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
if (Buffer->getBufferSize() < ELF::EI_NIDENT)
llvm_unreachable("Unexpected ELF object size");
@@ -269,6 +277,85 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
}
}
+void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ uint32_t *TargetPtr = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+ uint64_t FinalAddress = Section.LoadAddress + Offset;
+
+ DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x"
+ << format("%llx", Section.Address + Offset)
+ << " FinalAddress: 0x" << format("%llx",FinalAddress)
+ << " Value: 0x" << format("%llx",Value)
+ << " Type: 0x" << format("%x",Type)
+ << " Addend: 0x" << format("%llx",Addend)
+ << "\n");
+
+ switch (Type) {
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ case ELF::R_AARCH64_ABS64: {
+ uint64_t *TargetPtr = reinterpret_cast<uint64_t*>(Section.Address + Offset);
+ *TargetPtr = Value + Addend;
+ break;
+ }
+ case ELF::R_AARCH64_PREL32: { // test-shift.ll (.eh_frame)
+ uint64_t Result = Value + Addend - FinalAddress;
+ assert(static_cast<int64_t>(Result) >= INT32_MIN &&
+ static_cast<int64_t>(Result) <= UINT32_MAX);
+ *TargetPtr = static_cast<uint32_t>(Result & 0xffffffffU);
+ break;
+ }
+ case ELF::R_AARCH64_CALL26: // fallthrough
+ case ELF::R_AARCH64_JUMP26: {
+ // Operation: S+A-P. Set Call or B immediate value to bits fff_fffc of the
+ // calculation.
+ uint64_t BranchImm = Value + Addend - FinalAddress;
+
+ // "Check that -2^27 <= result < 2^27".
+ assert(-(1LL << 27) <= static_cast<int64_t>(BranchImm) &&
+ static_cast<int64_t>(BranchImm) < (1LL << 27));
+ // Immediate goes in bits 25:0 of B and BL.
+ *TargetPtr |= static_cast<uint32_t>(BranchImm & 0xffffffcU) >> 2;
+ break;
+ }
+ case ELF::R_AARCH64_MOVW_UABS_G3: {
+ uint64_t Result = Value + Addend;
+ // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+ *TargetPtr |= Result >> (48 - 5);
+ // Shift is "lsl #48", in bits 22:21
+ *TargetPtr |= 3 << 21;
+ break;
+ }
+ case ELF::R_AARCH64_MOVW_UABS_G2_NC: {
+ uint64_t Result = Value + Addend;
+ // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+ *TargetPtr |= ((Result & 0xffff00000000ULL) >> (32 - 5));
+ // Shift is "lsl #32", in bits 22:21
+ *TargetPtr |= 2 << 21;
+ break;
+ }
+ case ELF::R_AARCH64_MOVW_UABS_G1_NC: {
+ uint64_t Result = Value + Addend;
+ // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+ *TargetPtr |= ((Result & 0xffff0000U) >> (16 - 5));
+ // Shift is "lsl #16", in bits 22:21
+ *TargetPtr |= 1 << 21;
+ break;
+ }
+ case ELF::R_AARCH64_MOVW_UABS_G0_NC: {
+ uint64_t Result = Value + Addend;
+ // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+ *TargetPtr |= ((Result & 0xffffU) << 5);
+ // Shift is "lsl #0", in bits 22:21. No action needed.
+ break;
+ }
+ }
+}
+
void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
uint64_t Offset,
uint32_t Value,
@@ -541,6 +628,11 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
llvm_unreachable("Relocation R_PPC64_REL32 overflow");
writeInt32BE(LocalAddress, delta);
} break;
+ case ELF::R_PPC64_REL64: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t Delta = Value - FinalAddress + Addend;
+ writeInt64BE(LocalAddress, Delta);
+ } break;
case ELF::R_PPC64_ADDR64 :
writeInt64BE(LocalAddress, Value + Addend);
break;
@@ -560,6 +652,48 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
}
}
+void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend) {
+ uint8_t *LocalAddress = Section.Address + Offset;
+ switch (Type) {
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ case ELF::R_390_PC16DBL:
+ case ELF::R_390_PLT16DBL: {
+ int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+ assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow");
+ writeInt16BE(LocalAddress, Delta / 2);
+ break;
+ }
+ case ELF::R_390_PC32DBL:
+ case ELF::R_390_PLT32DBL: {
+ int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+ assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow");
+ writeInt32BE(LocalAddress, Delta / 2);
+ break;
+ }
+ case ELF::R_390_PC32: {
+ int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+ assert(int32_t(Delta) == Delta && "R_390_PC32 overflow");
+ writeInt32BE(LocalAddress, Delta);
+ break;
+ }
+ case ELF::R_390_64:
+ writeInt64BE(LocalAddress, Value + Addend);
+ break;
+ }
+}
+
+void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE,
+ uint64_t Value) {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend);
+}
+
void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
uint64_t Offset,
uint64_t Value,
@@ -574,6 +708,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
(uint32_t)(Value & 0xffffffffL), Type,
(uint32_t)(Addend & 0xffffffffL));
break;
+ case Triple::aarch64:
+ resolveAArch64Relocation(Section, Offset, Value, Type, Addend);
+ break;
case Triple::arm: // Fall through.
case Triple::thumb:
resolveARMRelocation(Section, Offset,
@@ -589,19 +726,25 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
case Triple::ppc64:
resolvePPC64Relocation(Section, Offset, Value, Type, Addend);
break;
+ case Triple::systemz:
+ resolveSystemZRelocation(Section, Offset, Value, Type, Addend);
+ break;
default: llvm_unreachable("Unsupported CPU type!");
}
}
-void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
+void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
+ RelocationRef RelI,
ObjectImage &Obj,
ObjSectionToIDMap &ObjSectionToID,
const SymbolTableMap &Symbols,
StubMap &Stubs) {
-
- uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
- intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
- const SymbolRef &Symbol = Rel.Symbol;
+ uint64_t RelType;
+ Check(RelI.getType(RelType));
+ int64_t Addend;
+ Check(RelI.getAdditionalInfo(Addend));
+ SymbolRef Symbol;
+ Check(RelI.getSymbol(Symbol));
// Obtain the symbol name which is referenced in the relocation
StringRef TargetName;
@@ -617,14 +760,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
Symbol.getType(SymType);
if (lsi != Symbols.end()) {
Value.SectionID = lsi->second.first;
- Value.Addend = lsi->second.second;
+ Value.Addend = lsi->second.second + Addend;
} else {
// Search for the symbol in the global symbol table
SymbolTableMap::const_iterator gsi =
GlobalSymbolTable.find(TargetName.data());
if (gsi != GlobalSymbolTable.end()) {
Value.SectionID = gsi->second.first;
- Value.Addend = gsi->second.second;
+ Value.Addend = gsi->second.second + Addend;
} else {
switch (SymType) {
case SymbolRef::ST_Debug: {
@@ -657,21 +800,73 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
}
}
}
- DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
- << " Rel.Offset: " << Rel.Offset
+ uint64_t Offset;
+ Check(RelI.getOffset(Offset));
+
+ DEBUG(dbgs() << "\t\tSectionID: " << SectionID
+ << " Offset: " << Offset
<< "\n");
- if (Arch == Triple::arm &&
+ if (Arch == Triple::aarch64 &&
+ (RelType == ELF::R_AARCH64_CALL26 ||
+ RelType == ELF::R_AARCH64_JUMP26)) {
+ // This is an AArch64 branch relocation, need to use a stub function.
+ DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation.");
+ SectionEntry &Section = Sections[SectionID];
+
+ // Look for an existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end()) {
+ resolveRelocation(Section, Offset,
+ (uint64_t)Section.Address + i->second, RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+ Section.StubOffset);
+
+ RelocationEntry REmovz_g3(SectionID,
+ StubTargetAddr - Section.Address,
+ ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend);
+ RelocationEntry REmovk_g2(SectionID,
+ StubTargetAddr - Section.Address + 4,
+ ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend);
+ RelocationEntry REmovk_g1(SectionID,
+ StubTargetAddr - Section.Address + 8,
+ ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend);
+ RelocationEntry REmovk_g0(SectionID,
+ StubTargetAddr - Section.Address + 12,
+ ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend);
+
+ if (Value.SymbolName) {
+ addRelocationForSymbol(REmovz_g3, Value.SymbolName);
+ addRelocationForSymbol(REmovk_g2, Value.SymbolName);
+ addRelocationForSymbol(REmovk_g1, Value.SymbolName);
+ addRelocationForSymbol(REmovk_g0, Value.SymbolName);
+ } else {
+ addRelocationForSection(REmovz_g3, Value.SectionID);
+ addRelocationForSection(REmovk_g2, Value.SectionID);
+ addRelocationForSection(REmovk_g1, Value.SectionID);
+ addRelocationForSection(REmovk_g0, Value.SectionID);
+ }
+ resolveRelocation(Section, Offset,
+ (uint64_t)Section.Address + Section.StubOffset,
+ RelType, 0);
+ Section.StubOffset += getMaxStubSize();
+ }
+ } else if (Arch == Triple::arm &&
(RelType == ELF::R_ARM_PC24 ||
RelType == ELF::R_ARM_CALL ||
RelType == ELF::R_ARM_JUMP24)) {
// This is an ARM branch relocation, need to use a stub function.
DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
- SectionEntry &Section = Sections[Rel.SectionID];
+ SectionEntry &Section = Sections[SectionID];
// Look for an existing stub.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
- resolveRelocation(Section, Rel.Offset,
+ resolveRelocation(Section, Offset,
(uint64_t)Section.Address + i->second, RelType, 0);
DEBUG(dbgs() << " Stub function found\n");
} else {
@@ -680,14 +875,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
Stubs[Value] = Section.StubOffset;
uint8_t *StubTargetAddr = createStubFunction(Section.Address +
Section.StubOffset);
- RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+ RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
ELF::R_ARM_ABS32, Value.Addend);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
addRelocationForSection(RE, Value.SectionID);
- resolveRelocation(Section, Rel.Offset,
+ resolveRelocation(Section, Offset,
(uint64_t)Section.Address + Section.StubOffset,
RelType, 0);
Section.StubOffset += getMaxStubSize();
@@ -696,8 +891,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
RelType == ELF::R_MIPS_26) {
// This is an Mips branch relocation, need to use a stub function.
DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
- SectionEntry &Section = Sections[Rel.SectionID];
- uint8_t *Target = Section.Address + Rel.Offset;
+ SectionEntry &Section = Sections[SectionID];
+ uint8_t *Target = Section.Address + Offset;
uint32_t *TargetAddress = (uint32_t *)Target;
// Extract the addend from the instruction.
@@ -708,7 +903,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
// Look up for existing stub.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
- resolveRelocation(Section, Rel.Offset,
+ resolveRelocation(Section, Offset,
(uint64_t)Section.Address + i->second, RelType, 0);
DEBUG(dbgs() << " Stub function found\n");
} else {
@@ -719,10 +914,10 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
Section.StubOffset);
// Creating Hi and Lo relocations for the filled stub instructions.
- RelocationEntry REHi(Rel.SectionID,
+ RelocationEntry REHi(SectionID,
StubTargetAddr - Section.Address,
ELF::R_MIPS_HI16, Value.Addend);
- RelocationEntry RELo(Rel.SectionID,
+ RelocationEntry RELo(SectionID,
StubTargetAddr - Section.Address + 4,
ELF::R_MIPS_LO16, Value.Addend);
@@ -734,7 +929,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
addRelocationForSection(RELo, Value.SectionID);
}
- resolveRelocation(Section, Rel.Offset,
+ resolveRelocation(Section, Offset,
(uint64_t)Section.Address + Section.StubOffset,
RelType, 0);
Section.StubOffset += getMaxStubSize();
@@ -744,8 +939,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
// A PPC branch relocation will need a stub function if the target is
// an external symbol (Symbol::ST_Unknown) or if the target address
// is not within the signed 24-bits branch address.
- SectionEntry &Section = Sections[Rel.SectionID];
- uint8_t *Target = Section.Address + Rel.Offset;
+ SectionEntry &Section = Sections[SectionID];
+ uint8_t *Target = Section.Address + Offset;
bool RangeOverflow = false;
if (SymType != SymbolRef::ST_Unknown) {
// A function call may points to the .opd entry, so the final symbol value
@@ -755,7 +950,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
int32_t delta = static_cast<int32_t>(Target - RelocTarget);
// If it is within 24-bits branch range, just set the branch target
if (SignExtend32<24>(delta) == delta) {
- RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
@@ -770,7 +965,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end()) {
// Symbol function stub already created, just relocate to it
- resolveRelocation(Section, Rel.Offset,
+ resolveRelocation(Section, Offset,
(uint64_t)Section.Address + i->second, RelType, 0);
DEBUG(dbgs() << " Stub function found\n");
} else {
@@ -779,21 +974,21 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
Stubs[Value] = Section.StubOffset;
uint8_t *StubTargetAddr = createStubFunction(Section.Address +
Section.StubOffset);
- RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+ RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
ELF::R_PPC64_ADDR64, Value.Addend);
// Generates the 64-bits address loads as exemplified in section
// 4.5.1 in PPC64 ELF ABI.
- RelocationEntry REhst(Rel.SectionID,
+ RelocationEntry REhst(SectionID,
StubTargetAddr - Section.Address + 2,
ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend);
- RelocationEntry REhr(Rel.SectionID,
+ RelocationEntry REhr(SectionID,
StubTargetAddr - Section.Address + 6,
ELF::R_PPC64_ADDR16_HIGHER, Value.Addend);
- RelocationEntry REh(Rel.SectionID,
+ RelocationEntry REh(SectionID,
StubTargetAddr - Section.Address + 14,
ELF::R_PPC64_ADDR16_HI, Value.Addend);
- RelocationEntry REl(Rel.SectionID,
+ RelocationEntry REl(SectionID,
StubTargetAddr - Section.Address + 18,
ELF::R_PPC64_ADDR16_LO, Value.Addend);
@@ -809,7 +1004,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
addRelocationForSection(REl, Value.SectionID);
}
- resolveRelocation(Section, Rel.Offset,
+ resolveRelocation(Section, Offset,
(uint64_t)Section.Address + Section.StubOffset,
RelType, 0);
if (SymType == SymbolRef::ST_Unknown)
@@ -819,7 +1014,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
}
}
} else {
- RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
// Extra check to avoid relocation againt empty symbols (usually
// the R_PPC64_TOC).
if (Value.SymbolName && !TargetName.empty())
@@ -827,8 +1022,55 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
else
addRelocationForSection(RE, Value.SectionID);
}
+ } else if (Arch == Triple::systemz &&
+ (RelType == ELF::R_390_PLT32DBL ||
+ RelType == ELF::R_390_GOTENT)) {
+ // Create function stubs for both PLT and GOT references, regardless of
+ // whether the GOT reference is to data or code. The stub contains the
+ // full address of the symbol, as needed by GOT references, and the
+ // executable part only adds an overhead of 8 bytes.
+ //
+ // We could try to conserve space by allocating the code and data
+ // parts of the stub separately. However, as things stand, we allocate
+ // a stub for every relocation, so using a GOT in JIT code should be
+ // no less space efficient than using an explicit constant pool.
+ DEBUG(dbgs() << "\t\tThis is a SystemZ indirect relocation.");
+ SectionEntry &Section = Sections[SectionID];
+
+ // Look for an existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ uintptr_t StubAddress;
+ if (i != Stubs.end()) {
+ StubAddress = uintptr_t(Section.Address) + i->second;
+ DEBUG(dbgs() << " Stub function found\n");
+ } else {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+
+ uintptr_t BaseAddress = uintptr_t(Section.Address);
+ uintptr_t StubAlignment = getStubAlignment();
+ StubAddress = (BaseAddress + Section.StubOffset +
+ StubAlignment - 1) & -StubAlignment;
+ unsigned StubOffset = StubAddress - BaseAddress;
+
+ Stubs[Value] = StubOffset;
+ createStubFunction((uint8_t *)StubAddress);
+ RelocationEntry RE(SectionID, StubOffset + 8,
+ ELF::R_390_64, Value.Addend - Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ Section.StubOffset = StubOffset + getMaxStubSize();
+ }
+
+ if (RelType == ELF::R_390_GOTENT)
+ resolveRelocation(Section, Offset, StubAddress + 8,
+ ELF::R_390_PC32DBL, Addend);
+ else
+ resolveRelocation(Section, Offset, StubAddress, RelType, Addend);
} else {
- RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
@@ -836,13 +1078,6 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
}
}
-unsigned RuntimeDyldELF::getCommonSymbolAlignment(const SymbolRef &Sym) {
- // In ELF, the value of an SHN_COMMON symbol is its alignment requirement.
- uint64_t Align;
- Check(Sym.getValue(Align));
- return Align;
-}
-
bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const {
if (Buffer->getBufferSize() < strlen(ELF::ElfMagic))
return false;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 07e704b..794c7ec 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -31,7 +31,12 @@ namespace {
} // end anonymous namespace
class RuntimeDyldELF : public RuntimeDyldImpl {
-protected:
+ void resolveRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
void resolveX86_64Relocation(const SectionEntry &Section,
uint64_t Offset,
uint64_t Value,
@@ -44,6 +49,12 @@ protected:
uint32_t Type,
int32_t Addend);
+ void resolveAArch64Relocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
+
void resolveARMRelocation(const SectionEntry &Section,
uint64_t Offset,
uint32_t Value,
@@ -62,21 +73,11 @@ protected:
uint32_t Type,
int64_t Addend);
- virtual void resolveRelocation(const SectionEntry &Section,
- uint64_t Offset,
- uint64_t Value,
- uint32_t Type,
- int64_t Addend);
-
- virtual void processRelocationRef(const ObjRelocationInfo &Rel,
- ObjectImage &Obj,
- ObjSectionToIDMap &ObjSectionToID,
- const SymbolTableMap &Symbols,
- StubMap &Stubs);
-
- unsigned getCommonSymbolAlignment(const SymbolRef &Sym);
-
- virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+ void resolveSystemZRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend);
uint64_t findPPC64TOC() const;
void findOPDEntrySection(ObjectImage &Obj,
@@ -84,12 +85,19 @@ protected:
RelocationValueRef &Rel);
public:
- RuntimeDyldELF(RTDyldMemoryManager *mm)
- : RuntimeDyldImpl(mm) {}
+ RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+ virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
+ virtual void processRelocationRef(unsigned SectionID,
+ RelocationRef RelI,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ const SymbolTableMap &Symbols,
+ StubMap &Stubs);
+ virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+ virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+ virtual StringRef getEHFrameSection();
virtual ~RuntimeDyldELF();
-
- bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index f100994..383ffab 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -49,7 +49,7 @@ public:
/// Address - address in the linker's memory where the section resides.
uint8_t *Address;
- /// Size - section size.
+ /// Size - section size. Doesn't include the stubs.
size_t Size;
/// LoadAddress - the address of the section in the target process's memory.
@@ -67,9 +67,9 @@ public:
uintptr_t ObjAddress;
SectionEntry(StringRef name, uint8_t *address, size_t size,
- uintptr_t stubOffset, uintptr_t objAddress)
+ uintptr_t objAddress)
: Name(name), Address(address), Size(size), LoadAddress((uintptr_t)address),
- StubOffset(stubOffset), ObjAddress(objAddress) {}
+ StubOffset(size), ObjAddress(objAddress) {}
};
/// RelocationEntry - used to represent relocations internally in the dynamic
@@ -89,20 +89,20 @@ public:
/// used to make a relocation section relative instead of symbol relative.
intptr_t Addend;
+ /// True if this is a PCRel relocation (MachO specific).
+ bool IsPCRel;
+
+ /// The size of this relocation (MachO specific).
+ unsigned Size;
+
RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend)
- : SectionID(id), Offset(offset), RelType(type), Addend(addend) {}
-};
+ : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+ IsPCRel(false), Size(0) {}
-/// ObjRelocationInfo - relocation information as read from the object file.
-/// Used to pass around data taken from object::RelocationRef, together with
-/// the section to which the relocation points (represented by a SectionID).
-class ObjRelocationInfo {
-public:
- unsigned SectionID;
- uint64_t Offset;
- SymbolRef Symbol;
- uint64_t Type;
- int64_t AdditionalInfo;
+ RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
+ bool IsPCRel, unsigned Size)
+ : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+ IsPCRel(IsPCRel), Size(Size) {}
};
class RelocationValueRef {
@@ -166,16 +166,29 @@ protected:
Triple::ArchType Arch;
inline unsigned getMaxStubSize() {
+ if (Arch == Triple::aarch64)
+ return 20; // movz; movk; movk; movk; br
if (Arch == Triple::arm || Arch == Triple::thumb)
return 8; // 32-bit instruction and 32-bit address
else if (Arch == Triple::mipsel || Arch == Triple::mips)
return 16;
else if (Arch == Triple::ppc64)
return 44;
+ else if (Arch == Triple::x86_64)
+ return 8; // GOT
+ else if (Arch == Triple::systemz)
+ return 16;
else
return 0;
}
+ inline unsigned getStubAlignment() {
+ if (Arch == Triple::systemz)
+ return 8;
+ else
+ return 1;
+ }
+
bool HasError;
std::string ErrorStr;
@@ -194,22 +207,15 @@ protected:
return (uint8_t*)Sections[SectionID].Address;
}
- // Subclasses can override this method to get the alignment requirement of
- // a common symbol. Returns no alignment requirement if not implemented.
- virtual unsigned getCommonSymbolAlignment(const SymbolRef &Sym) {
- return 0;
- }
-
-
void writeInt16BE(uint8_t *Addr, uint16_t Value) {
- if (sys::isLittleEndianHost())
+ if (sys::IsLittleEndianHost)
Value = sys::SwapByteOrder(Value);
*Addr = (Value >> 8) & 0xFF;
*(Addr+1) = Value & 0xFF;
}
void writeInt32BE(uint8_t *Addr, uint32_t Value) {
- if (sys::isLittleEndianHost())
+ if (sys::IsLittleEndianHost)
Value = sys::SwapByteOrder(Value);
*Addr = (Value >> 24) & 0xFF;
*(Addr+1) = (Value >> 16) & 0xFF;
@@ -218,7 +224,7 @@ protected:
}
void writeInt64BE(uint8_t *Addr, uint64_t Value) {
- if (sys::isLittleEndianHost())
+ if (sys::IsLittleEndianHost)
Value = sys::SwapByteOrder(Value);
*Addr = (Value >> 56) & 0xFF;
*(Addr+1) = (Value >> 48) & 0xFF;
@@ -269,24 +275,16 @@ protected:
/// \brief Resolves relocations from Relocs list with address from Value.
void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
- void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
/// \brief A object file specific relocation resolver
- /// \param Section The section where the relocation is being applied
- /// \param Offset The offset into the section for this relocation
+ /// \param RE The relocation to be resolved
/// \param Value Target symbol address to apply the relocation action
- /// \param Type object file specific relocation type
- /// \param Addend A constant addend used to compute the value to be stored
- /// into the relocatable field
- virtual void resolveRelocation(const SectionEntry &Section,
- uint64_t Offset,
- uint64_t Value,
- uint32_t Type,
- int64_t Addend) = 0;
+ virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value) = 0;
/// \brief Parses the object file relocation and stores it to Relocations
/// or SymbolRelocations (this depends on the object file type).
- virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ virtual void processRelocationRef(unsigned SectionID,
+ RelocationRef RelI,
ObjectImage &Obj,
ObjSectionToIDMap &ObjSectionToID,
const SymbolTableMap &Symbols,
@@ -336,6 +334,8 @@ public:
StringRef getErrorString() { return ErrorStr; }
virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const = 0;
+
+ virtual StringRef getEHFrameSection();
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index bcc3df1..01a3fd9 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -21,16 +21,87 @@ using namespace llvm::object;
namespace llvm {
+static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText, intptr_t DeltaForEH) {
+ uint32_t Length = *((uint32_t*)P);
+ P += 4;
+ unsigned char *Ret = P + Length;
+ uint32_t Offset = *((uint32_t*)P);
+ if (Offset == 0) // is a CIE
+ return Ret;
+
+ P += 4;
+ intptr_t FDELocation = *((intptr_t*)P);
+ intptr_t NewLocation = FDELocation - DeltaForText;
+ *((intptr_t*)P) = NewLocation;
+ P += sizeof(intptr_t);
+
+ // Skip the FDE address range
+ P += sizeof(intptr_t);
+
+ uint8_t Augmentationsize = *P;
+ P += 1;
+ if (Augmentationsize != 0) {
+ intptr_t LSDA = *((intptr_t*)P);
+ intptr_t NewLSDA = LSDA - DeltaForEH;
+ *((intptr_t*)P) = NewLSDA;
+ }
+
+ return Ret;
+}
+
+static intptr_t computeDelta(SectionEntry *A, SectionEntry *B) {
+ intptr_t ObjDistance = A->ObjAddress - B->ObjAddress;
+ intptr_t MemDistance = A->LoadAddress - B->LoadAddress;
+ return ObjDistance - MemDistance;
+}
+
+StringRef RuntimeDyldMachO::getEHFrameSection() {
+ SectionEntry *Text = NULL;
+ SectionEntry *EHFrame = NULL;
+ SectionEntry *ExceptTab = NULL;
+ for (int i = 0, e = Sections.size(); i != e; ++i) {
+ if (Sections[i].Name == "__eh_frame")
+ EHFrame = &Sections[i];
+ else if (Sections[i].Name == "__text")
+ Text = &Sections[i];
+ else if (Sections[i].Name == "__gcc_except_tab")
+ ExceptTab = &Sections[i];
+ }
+ if (Text == NULL || EHFrame == NULL)
+ return StringRef();
+
+ intptr_t DeltaForText = computeDelta(Text, EHFrame);
+ intptr_t DeltaForEH = 0;
+ if (ExceptTab)
+ DeltaForEH = computeDelta(ExceptTab, EHFrame);
+
+ unsigned char *P = EHFrame->Address;
+ unsigned char *End = P + EHFrame->Size;
+ do {
+ P = processFDE(P, DeltaForText, DeltaForEH);
+ } while(P != End);
+
+ return StringRef((char*)EHFrame->Address, EHFrame->Size);
+}
+
+void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE,
+ uint64_t Value) {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
+ RE.IsPCRel, RE.Size);
+}
+
void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section,
uint64_t Offset,
uint64_t Value,
uint32_t Type,
- int64_t Addend) {
+ int64_t Addend,
+ bool isPCRel,
+ unsigned LogSize) {
uint8_t *LocalAddress = Section.Address + Offset;
uint64_t FinalAddress = Section.LoadAddress + Offset;
- bool isPCRel = (Type >> 24) & 1;
- unsigned MachoType = (Type >> 28) & 0xf;
- unsigned Size = 1 << ((Type >> 25) & 3);
+ unsigned MachoType = Type;
+ unsigned Size = 1 << LogSize;
DEBUG(dbgs() << "resolveRelocation LocalAddress: "
<< format("%p", LocalAddress)
@@ -205,89 +276,111 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
return false;
}
-void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel,
+void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
+ RelocationRef RelI,
ObjectImage &Obj,
ObjSectionToIDMap &ObjSectionToID,
const SymbolTableMap &Symbols,
StubMap &Stubs) {
+ const ObjectFile *OF = Obj.getObjectFile();
+ const MachOObjectFile *MachO = static_cast<const MachOObjectFile*>(OF);
+ macho::RelocationEntry RE = MachO->getRelocation(RelI.getRawDataRefImpl());
- uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+ uint32_t RelType = MachO->getAnyRelocationType(RE);
RelocationValueRef Value;
- SectionEntry &Section = Sections[Rel.SectionID];
+ SectionEntry &Section = Sections[SectionID];
+
+ bool isExtern = MachO->getPlainRelocationExternal(RE);
+ bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+ unsigned Size = MachO->getAnyRelocationLength(RE);
+ uint64_t Offset;
+ RelI.getOffset(Offset);
+ uint8_t *LocalAddress = Section.Address + Offset;
+ unsigned NumBytes = 1 << Size;
+ uint64_t Addend = 0;
+ memcpy(&Addend, LocalAddress, NumBytes);
- bool isExtern = (RelType >> 27) & 1;
if (isExtern) {
// Obtain the symbol name which is referenced in the relocation
+ SymbolRef Symbol;
+ RelI.getSymbol(Symbol);
StringRef TargetName;
- const SymbolRef &Symbol = Rel.Symbol;
Symbol.getName(TargetName);
// First search for the symbol in the local symbol table
SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
if (lsi != Symbols.end()) {
Value.SectionID = lsi->second.first;
- Value.Addend = lsi->second.second;
+ Value.Addend = lsi->second.second + Addend;
} else {
// Search for the symbol in the global symbol table
SymbolTableMap::const_iterator gsi = GlobalSymbolTable.find(TargetName.data());
if (gsi != GlobalSymbolTable.end()) {
Value.SectionID = gsi->second.first;
- Value.Addend = gsi->second.second;
- } else
+ Value.Addend = gsi->second.second + Addend;
+ } else {
Value.SymbolName = TargetName.data();
+ Value.Addend = Addend;
+ }
}
} else {
- error_code err;
- uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF);
- section_iterator si = Obj.begin_sections(),
- se = Obj.end_sections();
- for (uint8_t i = 1; i < sectionIndex; i++) {
- error_code err;
- si.increment(err);
- if (si == se)
- break;
- }
- assert(si != se && "No section containing relocation!");
- Value.SectionID = findOrEmitSection(Obj, *si, true, ObjSectionToID);
- Value.Addend = 0;
- // FIXME: The size and type of the relocation determines if we can
- // encode an Addend in the target location itself, and if so, how many
- // bytes we should read in order to get it. We don't yet support doing
- // that, and just assuming it's sizeof(intptr_t) is blatantly wrong.
- //Value.Addend = *(const intptr_t *)Target;
- if (Value.Addend) {
- // The MachO addend is an offset from the current section. We need it
- // to be an offset from the destination section
- Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
- }
+ SectionRef Sec = MachO->getRelocationSection(RE);
+ Value.SectionID = findOrEmitSection(Obj, Sec, true, ObjSectionToID);
+ uint64_t Addr;
+ Sec.getAddress(Addr);
+ Value.Addend = Addend - Addr;
}
- if (Arch == Triple::arm && (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
+ if (Arch == Triple::x86_64 && RelType == macho::RIT_X86_64_GOT) {
+ assert(IsPCRel);
+ assert(Size == 2);
+ StubMap::const_iterator i = Stubs.find(Value);
+ uint8_t *Addr;
+ if (i != Stubs.end()) {
+ Addr = Section.Address + i->second;
+ } else {
+ Stubs[Value] = Section.StubOffset;
+ uint8_t *GOTEntry = Section.Address + Section.StubOffset;
+ RelocationEntry RE(SectionID, Section.StubOffset,
+ macho::RIT_X86_64_Unsigned, Value.Addend - 4, false,
+ 3);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ Section.StubOffset += 8;
+ Addr = GOTEntry;
+ }
+ resolveRelocation(Section, Offset, (uint64_t)Addr,
+ macho::RIT_X86_64_Unsigned, 4, true, 2);
+ } else if (Arch == Triple::arm &&
+ (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
// This is an ARM branch relocation, need to use a stub function.
// Look up for existing stub.
StubMap::const_iterator i = Stubs.find(Value);
if (i != Stubs.end())
- resolveRelocation(Section, Rel.Offset,
+ resolveRelocation(Section, Offset,
(uint64_t)Section.Address + i->second,
- RelType, 0);
+ RelType, 0, IsPCRel, Size);
else {
// Create a new stub function.
Stubs[Value] = Section.StubOffset;
uint8_t *StubTargetAddr = createStubFunction(Section.Address +
Section.StubOffset);
- RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+ RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
macho::RIT_Vanilla, Value.Addend);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
addRelocationForSection(RE, Value.SectionID);
- resolveRelocation(Section, Rel.Offset,
+ resolveRelocation(Section, Offset,
(uint64_t)Section.Address + Section.StubOffset,
- RelType, 0);
+ RelType, 0, IsPCRel, Size);
Section.StubOffset += getMaxStubSize();
}
} else {
- RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+ RelocationEntry RE(SectionID, Offset, RelType, Value.Addend,
+ IsPCRel, Size);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 62d8487..df8d3bb 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -16,7 +16,7 @@
#include "RuntimeDyldImpl.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachO.h"
#include "llvm/Support/Format.h"
using namespace llvm;
@@ -25,7 +25,6 @@ using namespace llvm::object;
namespace llvm {
class RuntimeDyldMachO : public RuntimeDyldImpl {
-protected:
bool resolveI386Relocation(uint8_t *LocalAddress,
uint64_t FinalAddress,
uint64_t Value,
@@ -48,22 +47,25 @@ protected:
unsigned Size,
int64_t Addend);
- virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+ void resolveRelocation(const SectionEntry &Section,
+ uint64_t Offset,
+ uint64_t Value,
+ uint32_t Type,
+ int64_t Addend,
+ bool isPCRel,
+ unsigned Size);
+public:
+ RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+ virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
+ virtual void processRelocationRef(unsigned SectionID,
+ RelocationRef RelI,
ObjectImage &Obj,
ObjSectionToIDMap &ObjSectionToID,
const SymbolTableMap &Symbols,
StubMap &Stubs);
-
-public:
- virtual void resolveRelocation(const SectionEntry &Section,
- uint64_t Offset,
- uint64_t Value,
- uint32_t Type,
- int64_t Addend);
-
- RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
-
- bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+ virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+ virtual StringRef getEHFrameSection();
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index fb591a8..7761127d 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -1605,6 +1605,29 @@ void AssemblyWriter::printFunction(const Function *F) {
if (F->isMaterializable())
Out << "; Materializable\n";
+ const AttributeSet &Attrs = F->getAttributes();
+ if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) {
+ AttributeSet AS = Attrs.getFnAttributes();
+ std::string AttrStr;
+
+ unsigned Idx = 0;
+ for (unsigned E = AS.getNumSlots(); Idx != E; ++Idx)
+ if (AS.getSlotIndex(Idx) == AttributeSet::FunctionIndex)
+ break;
+
+ for (AttributeSet::iterator I = AS.begin(Idx), E = AS.end(Idx);
+ I != E; ++I) {
+ Attribute Attr = *I;
+ if (!Attr.isStringAttribute()) {
+ if (!AttrStr.empty()) AttrStr += ' ';
+ AttrStr += Attr.getAsString();
+ }
+ }
+
+ if (!AttrStr.empty())
+ Out << "; Function Attrs: " << AttrStr << '\n';
+ }
+
if (F->isDeclaration())
Out << "declare ";
else
@@ -1620,7 +1643,6 @@ void AssemblyWriter::printFunction(const Function *F) {
}
FunctionType *FT = F->getFunctionType();
- const AttributeSet &Attrs = F->getAttributes();
if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
Out << Attrs.getAsString(AttributeSet::ReturnIndex) << ' ';
TypePrinter.print(F->getReturnType(), Out);
@@ -1761,10 +1783,8 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
/// which slot it occupies.
///
void AssemblyWriter::printInfoComment(const Value &V) {
- if (AnnotationWriter) {
+ if (AnnotationWriter)
AnnotationWriter->printInfoComment(V, Out);
- return;
- }
}
// This member is called for each Instruction in a function..
diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h
index ad2670d..0b6228b 100644
--- a/contrib/llvm/lib/IR/AttributeImpl.h
+++ b/contrib/llvm/lib/IR/AttributeImpl.h
@@ -228,7 +228,7 @@ public:
/// is the index of the return, parameter, or function object that the
/// attributes are applied to, not the index into the AttrNodes list where the
/// attributes reside.
- uint64_t getSlotIndex(unsigned Slot) const {
+ unsigned getSlotIndex(unsigned Slot) const {
return AttrNodes[Slot].first;
}
@@ -248,15 +248,15 @@ public:
typedef AttributeSetNode::iterator iterator;
typedef AttributeSetNode::const_iterator const_iterator;
- iterator begin(unsigned Idx)
- { return AttrNodes[Idx].second->begin(); }
- iterator end(unsigned Idx)
- { return AttrNodes[Idx].second->end(); }
+ iterator begin(unsigned Slot)
+ { return AttrNodes[Slot].second->begin(); }
+ iterator end(unsigned Slot)
+ { return AttrNodes[Slot].second->end(); }
- const_iterator begin(unsigned Idx) const
- { return AttrNodes[Idx].second->begin(); }
- const_iterator end(unsigned Idx) const
- { return AttrNodes[Idx].second->end(); }
+ const_iterator begin(unsigned Slot) const
+ { return AttrNodes[Slot].second->begin(); }
+ const_iterator end(unsigned Slot) const
+ { return AttrNodes[Slot].second->end(); }
void Profile(FoldingSetNodeID &ID) const {
Profile(ID, AttrNodes);
@@ -270,7 +270,7 @@ public:
}
// FIXME: This atrocity is temporary.
- uint64_t Raw(uint64_t Index) const;
+ uint64_t Raw(unsigned Index) const;
};
} // end llvm namespace
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index 2d82891..4fe6f9d 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -195,6 +195,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "readnone";
if (hasAttribute(Attribute::ReadOnly))
return "readonly";
+ if (hasAttribute(Attribute::Returned))
+ return "returned";
if (hasAttribute(Attribute::ReturnsTwice))
return "returns_twice";
if (hasAttribute(Attribute::SExt))
@@ -393,6 +395,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
case Attribute::SanitizeThread: return 1ULL << 36;
case Attribute::SanitizeMemory: return 1ULL << 37;
case Attribute::NoBuiltin: return 1ULL << 38;
+ case Attribute::Returned: return 1ULL << 39;
}
llvm_unreachable("Unsupported attribute type");
}
@@ -481,11 +484,12 @@ unsigned AttributeSetNode::getStackAlignment() const {
}
std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
- std::string Str = "";
+ std::string Str;
for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
- E = AttrList.end(); I != E; ) {
+ E = AttrList.end(); I != E; ++I) {
+ if (I != AttrList.begin())
+ Str += ' ';
Str += I->getAsString(InAttrGrp);
- if (++I != E) Str += " ";
}
return Str;
}
@@ -494,7 +498,7 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
// AttributeSetImpl Definition
//===----------------------------------------------------------------------===//
-uint64_t AttributeSetImpl::Raw(uint64_t Index) const {
+uint64_t AttributeSetImpl::Raw(unsigned Index) const {
for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) {
if (getSlotIndex(I) != Index) continue;
const AttributeSetNode *ASN = AttrNodes[I].second;
@@ -592,7 +596,7 @@ AttributeSet AttributeSet::get(LLVMContext &C,
return getImpl(C, Attrs);
}
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) {
if (!B.hasAttributes())
return AttributeSet();
@@ -604,29 +608,29 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
continue;
if (Kind == Attribute::Alignment)
- Attrs.push_back(std::make_pair(Idx, Attribute::
+ Attrs.push_back(std::make_pair(Index, Attribute::
getWithAlignment(C, B.getAlignment())));
else if (Kind == Attribute::StackAlignment)
- Attrs.push_back(std::make_pair(Idx, Attribute::
+ Attrs.push_back(std::make_pair(Index, Attribute::
getWithStackAlignment(C, B.getStackAlignment())));
else
- Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind)));
+ Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
}
// Add target-dependent (string) attributes.
for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end();
I != E; ++I)
- Attrs.push_back(std::make_pair(Idx, Attribute::get(C, I->first,I->second)));
+ Attrs.push_back(std::make_pair(Index, Attribute::get(C, I->first,I->second)));
return get(C, Attrs);
}
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
ArrayRef<Attribute::AttrKind> Kind) {
SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(),
E = Kind.end(); I != E; ++I)
- Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I)));
+ Attrs.push_back(std::make_pair(Index, Attribute::get(C, *I)));
return get(C, Attrs);
}
@@ -643,20 +647,20 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
return getImpl(C, AttrNodeVec);
}
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
Attribute::AttrKind Attr) const {
- if (hasAttribute(Idx, Attr)) return *this;
- return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+ if (hasAttribute(Index, Attr)) return *this;
+ return addAttributes(C, Index, AttributeSet::get(C, Index, Attr));
}
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
StringRef Kind) const {
llvm::AttrBuilder B;
B.addAttribute(Kind);
- return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+ return addAttributes(C, Index, AttributeSet::get(C, Index, B));
}
-AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index,
AttributeSet Attrs) const {
if (!pImpl) return Attrs;
if (!Attrs.pImpl) return *this;
@@ -664,8 +668,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
#ifndef NDEBUG
// FIXME it is not obvious how this should work for alignment. For now, say
// we can't change a known alignment.
- unsigned OldAlign = getParamAlignment(Idx);
- unsigned NewAlign = Attrs.getParamAlignment(Idx);
+ unsigned OldAlign = getParamAlignment(Index);
+ unsigned NewAlign = Attrs.getParamAlignment(Index);
assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
"Attempt to change alignment!");
#endif
@@ -676,8 +680,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
AttributeSet AS;
uint64_t LastIndex = 0;
for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
- if (getSlotIndex(I) >= Idx) {
- if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+ if (getSlotIndex(I) >= Index) {
+ if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
break;
}
LastIndex = I + 1;
@@ -686,17 +690,17 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
// Now add the attribute into the correct slot. There may already be an
// AttributeSet there.
- AttrBuilder B(AS, Idx);
+ AttrBuilder B(AS, Index);
for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
- if (Attrs.getSlotIndex(I) == Idx) {
+ if (Attrs.getSlotIndex(I) == Index) {
for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I),
IE = Attrs.pImpl->end(I); II != IE; ++II)
B.addAttribute(*II);
break;
}
- AttrSet.push_back(AttributeSet::get(C, Idx, B));
+ AttrSet.push_back(AttributeSet::get(C, Index, B));
// Add the remaining attribute slots.
for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -705,13 +709,13 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
return get(C, AttrSet);
}
-AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Index,
Attribute::AttrKind Attr) const {
- if (!hasAttribute(Idx, Attr)) return *this;
- return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+ if (!hasAttribute(Index, Attr)) return *this;
+ return removeAttributes(C, Index, AttributeSet::get(C, Index, Attr));
}
-AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
AttributeSet Attrs) const {
if (!pImpl) return AttributeSet();
if (!Attrs.pImpl) return *this;
@@ -719,7 +723,7 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
#ifndef NDEBUG
// FIXME it is not obvious how this should work for alignment.
// For now, say we can't pass in alignment, which no current use does.
- assert(!Attrs.hasAttribute(Idx, Attribute::Alignment) &&
+ assert(!Attrs.hasAttribute(Index, Attribute::Alignment) &&
"Attempt to change alignment!");
#endif
@@ -729,8 +733,8 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
AttributeSet AS;
uint64_t LastIndex = 0;
for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
- if (getSlotIndex(I) >= Idx) {
- if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+ if (getSlotIndex(I) >= Index) {
+ if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
break;
}
LastIndex = I + 1;
@@ -739,15 +743,15 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
// Now remove the attribute from the correct slot. There may already be an
// AttributeSet there.
- AttrBuilder B(AS, Idx);
+ AttrBuilder B(AS, Index);
for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
- if (Attrs.getSlotIndex(I) == Idx) {
- B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Idx);
+ if (Attrs.getSlotIndex(I) == Index) {
+ B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index);
break;
}
- AttrSet.push_back(AttributeSet::get(C, Idx, B));
+ AttrSet.push_back(AttributeSet::get(C, Index, B));
// Add the remaining attribute slots.
for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -764,11 +768,11 @@ LLVMContext &AttributeSet::getContext() const {
return pImpl->getContext();
}
-AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const {
- return pImpl && hasAttributes(Idx) ?
+AttributeSet AttributeSet::getParamAttributes(unsigned Index) const {
+ return pImpl && hasAttributes(Index) ?
AttributeSet::get(pImpl->getContext(),
ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
- std::make_pair(Idx, getAttributes(Idx)))) :
+ std::make_pair(Index, getAttributes(Index)))) :
AttributeSet();
}
@@ -848,27 +852,27 @@ std::string AttributeSet::getAsString(unsigned Index,
}
/// \brief The attributes for the specified index are returned.
-AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const {
+AttributeSetNode *AttributeSet::getAttributes(unsigned Index) const {
if (!pImpl) return 0;
// Loop through to find the attribute node we want.
for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
- if (pImpl->getSlotIndex(I) == Idx)
+ if (pImpl->getSlotIndex(I) == Index)
return pImpl->getSlotNode(I);
return 0;
}
-AttributeSet::iterator AttributeSet::begin(unsigned Idx) const {
+AttributeSet::iterator AttributeSet::begin(unsigned Slot) const {
if (!pImpl)
return ArrayRef<Attribute>().begin();
- return pImpl->begin(Idx);
+ return pImpl->begin(Slot);
}
-AttributeSet::iterator AttributeSet::end(unsigned Idx) const {
+AttributeSet::iterator AttributeSet::end(unsigned Slot) const {
if (!pImpl)
return ArrayRef<Attribute>().end();
- return pImpl->end(Idx);
+ return pImpl->end(Slot);
}
//===----------------------------------------------------------------------===//
@@ -882,7 +886,7 @@ unsigned AttributeSet::getNumSlots() const {
return pImpl ? pImpl->getNumAttributes() : 0;
}
-uint64_t AttributeSet::getSlotIndex(unsigned Slot) const {
+unsigned AttributeSet::getSlotIndex(unsigned Slot) const {
assert(pImpl && Slot < pImpl->getNumAttributes() &&
"Slot # out of range!");
return pImpl->getSlotIndex(Slot);
@@ -919,13 +923,13 @@ void AttributeSet::dump() const {
// AttrBuilder Method Implementations
//===----------------------------------------------------------------------===//
-AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx)
+AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
: Attrs(0), Alignment(0), StackAlignment(0) {
AttributeSetImpl *pImpl = AS.pImpl;
if (!pImpl) return;
for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) {
- if (pImpl->getSlotIndex(I) != Idx) continue;
+ if (pImpl->getSlotIndex(I) != Index) continue;
for (AttributeSetImpl::const_iterator II = pImpl->begin(I),
IE = pImpl->end(I); II != IE; ++II)
@@ -982,16 +986,16 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
}
AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
- unsigned Idx = ~0U;
+ unsigned Slot = ~0U;
for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
if (A.getSlotIndex(I) == Index) {
- Idx = I;
+ Slot = I;
break;
}
- assert(Idx != ~0U && "Couldn't find index in AttributeSet!");
+ assert(Slot != ~0U && "Couldn't find index in AttributeSet!");
- for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) {
+ for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
Attribute Attr = *I;
if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
Attribute::AttrKind Kind = I->getKindAsEnum();
@@ -1069,16 +1073,16 @@ bool AttrBuilder::hasAttributes() const {
}
bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
- unsigned Idx = ~0U;
+ unsigned Slot = ~0U;
for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
if (A.getSlotIndex(I) == Index) {
- Idx = I;
+ Slot = I;
break;
}
- assert(Idx != ~0U && "Couldn't find the index!");
+ assert(Slot != ~0U && "Couldn't find the index!");
- for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx);
+ for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot);
I != E; ++I) {
Attribute Attr = *I;
if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
@@ -1109,33 +1113,6 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
return Alignment == B.Alignment && StackAlignment == B.StackAlignment;
}
-void AttrBuilder::removeFunctionOnlyAttrs() {
- removeAttribute(Attribute::NoReturn)
- .removeAttribute(Attribute::NoUnwind)
- .removeAttribute(Attribute::ReadNone)
- .removeAttribute(Attribute::ReadOnly)
- .removeAttribute(Attribute::NoInline)
- .removeAttribute(Attribute::AlwaysInline)
- .removeAttribute(Attribute::OptimizeForSize)
- .removeAttribute(Attribute::StackProtect)
- .removeAttribute(Attribute::StackProtectReq)
- .removeAttribute(Attribute::StackProtectStrong)
- .removeAttribute(Attribute::NoRedZone)
- .removeAttribute(Attribute::NoImplicitFloat)
- .removeAttribute(Attribute::Naked)
- .removeAttribute(Attribute::InlineHint)
- .removeAttribute(Attribute::StackAlignment)
- .removeAttribute(Attribute::UWTable)
- .removeAttribute(Attribute::NonLazyBind)
- .removeAttribute(Attribute::ReturnsTwice)
- .removeAttribute(Attribute::SanitizeAddress)
- .removeAttribute(Attribute::SanitizeThread)
- .removeAttribute(Attribute::SanitizeMemory)
- .removeAttribute(Attribute::MinSize)
- .removeAttribute(Attribute::NoDuplicate)
- .removeAttribute(Attribute::NoBuiltin);
-}
-
AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
// FIXME: Remove this in 4.0.
if (!Val) return *this;
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index 1abb656..2c6971c 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -237,18 +237,21 @@ void Constant::destroyConstantImpl() {
delete this;
}
-/// canTrap - Return true if evaluation of this constant could trap. This is
-/// true for things like constant expressions that could divide by zero.
-bool Constant::canTrap() const {
- assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+static bool canTrapImpl(const Constant *C,
+ SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) {
+ assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
// The only thing that could possibly trap are constant exprs.
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
- if (!CE) return false;
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE)
+ return false;
// ConstantExpr traps if any operands can trap.
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (CE->getOperand(i)->canTrap())
- return true;
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
+ if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps))
+ return true;
+ }
+ }
// Otherwise, only specific operations can trap.
switch (CE->getOpcode()) {
@@ -267,6 +270,13 @@ bool Constant::canTrap() const {
}
}
+/// canTrap - Return true if evaluation of this constant could trap. This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+ SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps;
+ return canTrapImpl(this, NonTrappingOps);
+}
+
/// isThreadDependent - Return true if the value can vary between threads.
bool Constant::isThreadDependent() const {
SmallPtrSet<const Constant*, 64> Visited;
diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h
index e995858..32bed95 100644
--- a/contrib/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm/lib/IR/ConstantsContext.h
@@ -318,7 +318,7 @@ struct ExprMapKeyType {
ArrayRef<Constant*> ops,
unsigned short flags = 0,
unsigned short optionalflags = 0,
- ArrayRef<unsigned> inds = ArrayRef<unsigned>())
+ ArrayRef<unsigned> inds = None)
: opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
uint8_t opcode;
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index 983b49c..889d574 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -21,7 +21,9 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
@@ -1301,6 +1303,53 @@ void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
}
+LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar) {
+ switch (unwrap<GlobalVariable>(GlobalVar)->getThreadLocalMode()) {
+ case GlobalVariable::NotThreadLocal:
+ return LLVMNotThreadLocal;
+ case GlobalVariable::GeneralDynamicTLSModel:
+ return LLVMGeneralDynamicTLSModel;
+ case GlobalVariable::LocalDynamicTLSModel:
+ return LLVMLocalDynamicTLSModel;
+ case GlobalVariable::InitialExecTLSModel:
+ return LLVMInitialExecTLSModel;
+ case GlobalVariable::LocalExecTLSModel:
+ return LLVMLocalExecTLSModel;
+ }
+
+ llvm_unreachable("Invalid GlobalVariable thread local mode");
+}
+
+void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode) {
+ GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+
+ switch (Mode) {
+ case LLVMNotThreadLocal:
+ GV->setThreadLocalMode(GlobalVariable::NotThreadLocal);
+ break;
+ case LLVMGeneralDynamicTLSModel:
+ GV->setThreadLocalMode(GlobalVariable::GeneralDynamicTLSModel);
+ break;
+ case LLVMLocalDynamicTLSModel:
+ GV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
+ break;
+ case LLVMInitialExecTLSModel:
+ GV->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+ break;
+ case LLVMLocalExecTLSModel:
+ GV->setThreadLocalMode(GlobalVariable::LocalExecTLSModel);
+ break;
+ }
+}
+
+LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar) {
+ return unwrap<GlobalVariable>(GlobalVar)->isExternallyInitialized();
+}
+
+void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
+ unwrap<GlobalVariable>(GlobalVar)->setExternallyInitialized(IsExtInit);
+}
+
/*--.. Operations on aliases ......................................--*/
LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
@@ -1396,6 +1445,18 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Func->setAttributes(PALnew);
}
+void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
+ const char *V) {
+ Function *Func = unwrap<Function>(Fn);
+ AttributeSet::AttrIndex Idx =
+ AttributeSet::AttrIndex(AttributeSet::FunctionIndex);
+ AttrBuilder B;
+
+ B.addAttribute(A, V);
+ AttributeSet Set = AttributeSet::get(Func->getContext(), Idx, B);
+ Func->addAttributes(Idx, Set);
+}
+
void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttributeSet PAL = Func->getAttributes();
@@ -2331,6 +2392,42 @@ LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
+ LLVMValueRef PTR, LLVMValueRef Val,
+ LLVMAtomicOrdering ordering,
+ LLVMBool singleThread) {
+ AtomicRMWInst::BinOp intop;
+ switch (op) {
+ case LLVMAtomicRMWBinOpXchg: intop = AtomicRMWInst::Xchg; break;
+ case LLVMAtomicRMWBinOpAdd: intop = AtomicRMWInst::Add; break;
+ case LLVMAtomicRMWBinOpSub: intop = AtomicRMWInst::Sub; break;
+ case LLVMAtomicRMWBinOpAnd: intop = AtomicRMWInst::And; break;
+ case LLVMAtomicRMWBinOpNand: intop = AtomicRMWInst::Nand; break;
+ case LLVMAtomicRMWBinOpOr: intop = AtomicRMWInst::Or; break;
+ case LLVMAtomicRMWBinOpXor: intop = AtomicRMWInst::Xor; break;
+ case LLVMAtomicRMWBinOpMax: intop = AtomicRMWInst::Max; break;
+ case LLVMAtomicRMWBinOpMin: intop = AtomicRMWInst::Min; break;
+ case LLVMAtomicRMWBinOpUMax: intop = AtomicRMWInst::UMax; break;
+ case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break;
+ }
+ AtomicOrdering intordering;
+ switch (ordering) {
+ case LLVMAtomicOrderingNotAtomic: intordering = NotAtomic; break;
+ case LLVMAtomicOrderingUnordered: intordering = Unordered; break;
+ case LLVMAtomicOrderingMonotonic: intordering = Monotonic; break;
+ case LLVMAtomicOrderingAcquire: intordering = Acquire; break;
+ case LLVMAtomicOrderingRelease: intordering = Release; break;
+ case LLVMAtomicOrderingAcquireRelease:
+ intordering = AcquireRelease;
+ break;
+ case LLVMAtomicOrderingSequentiallyConsistent:
+ intordering = SequentiallyConsistent;
+ break;
+ }
+ return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val),
+ intordering, singleThread ? SingleThread : CrossThread));
+}
+
/*===-- Module providers --------------------------------------------------===*/
@@ -2397,6 +2494,13 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(
StringRef(BufferName)));
}
+const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf) {
+ return unwrap(MemBuf)->getBufferStart();
+}
+
+size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf) {
+ return unwrap(MemBuf)->getBufferSize();
+}
void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
delete unwrap(MemBuf);
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
index 9d6e840..0980e80 100644
--- a/contrib/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -61,6 +61,9 @@ void DIBuilder::finalize() {
DIArray GVs = getOrCreateArray(AllGVs);
DIType(TempGVs).replaceAllUsesWith(GVs);
+
+ DIArray IMs = getOrCreateArray(AllImportedModules);
+ DIType(TempImportedModules).replaceAllUsesWith(IMs);
}
/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
@@ -101,6 +104,8 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
TempGVs = MDNode::getTemporary(VMContext, TElts);
+ TempImportedModules = MDNode::getTemporary(VMContext, TElts);
+
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
createFilePathPair(VMContext, Filename, Directory),
@@ -113,6 +118,7 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
TempRetainTypes,
TempSubprograms,
TempGVs,
+ TempImportedModules,
MDString::get(VMContext, SplitName)
};
TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
@@ -122,6 +128,21 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
NMD->addOperand(TheCU);
}
+DIImportedModule DIBuilder::createImportedModule(DIScope Context,
+ DINameSpace NS,
+ unsigned Line) {
+ Value *Elts[] = {
+ GetTagConstant(VMContext, dwarf::DW_TAG_imported_module),
+ Context,
+ NS,
+ ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+ };
+ DIImportedModule M(MDNode::get(VMContext, Elts));
+ assert(M.Verify() && "Imported module should be valid");
+ AllImportedModules.push_back(M);
+ return M;
+}
+
/// createFile - Create a file descriptor to hold debugging information
/// for a file.
DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
@@ -225,7 +246,8 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
return DIDerivedType(MDNode::get(VMContext, Elts));
}
-DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) {
+DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy,
+ DIType Base) {
// Pointer types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
@@ -427,7 +449,7 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
DIFile File, unsigned LineNumber,
StringRef GetterName,
- StringRef SetterName,
+ StringRef SetterName,
unsigned PropertyAttributes,
DIType Ty) {
Value *Elts[] = {
@@ -601,7 +623,7 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
DICompositeType DIBuilder::createEnumerationType(
DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
- DIType ClassType) {
+ DIType UnderlyingType) {
// TAG_enumeration_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
@@ -613,7 +635,7 @@ DICompositeType DIBuilder::createEnumerationType(
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- ClassType,
+ UnderlyingType,
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
Constant::getNullValue(Type::getInt32Ty(VMContext))
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
index ecd5216..5658f56 100644
--- a/contrib/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -41,7 +41,7 @@ char DataLayout::ID = 0;
// Support for StructLayout
//===----------------------------------------------------------------------===//
-StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
+StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
StructAlignment = 0;
StructSize = 0;
@@ -50,7 +50,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
// Loop over each of the elements, placing them in memory.
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Type *Ty = ST->getElementType(i);
- unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+ unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty);
// Add padding if necessary to align the data element properly.
if ((StructSize & (TyAlign-1)) != 0)
@@ -60,7 +60,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
StructAlignment = std::max(TyAlign, StructAlignment);
MemberOffsets[i] = StructSize;
- StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+ StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item
}
// Empty structures have alignment of 1 byte.
diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp
index 0ffe99d..ec83dca 100644
--- a/contrib/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm/lib/IR/DebugInfo.cpp
@@ -64,7 +64,8 @@ bool DIDescriptor::Verify() const {
DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
DIObjCProperty(DbgNode).Verify() ||
DITemplateTypeParameter(DbgNode).Verify() ||
- DITemplateValueParameter(DbgNode).Verify());
+ DITemplateValueParameter(DbgNode).Verify() ||
+ DIImportedModule(DbgNode).Verify());
}
static Value *getField(const MDNode *DbgNode, unsigned Elt) {
@@ -336,6 +337,12 @@ bool DIDescriptor::isEnumerator() const {
bool DIDescriptor::isObjCProperty() const {
return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
}
+
+/// \brief Return true if the specified tag is DW_TAG_imported_module.
+bool DIDescriptor::isImportedModule() const {
+ return DbgNode && getTag() == dwarf::DW_TAG_imported_module;
+}
+
//===----------------------------------------------------------------------===//
// Simple Descriptor Constructors and other Methods
//===----------------------------------------------------------------------===//
@@ -418,7 +425,7 @@ bool DICompileUnit::Verify() const {
if (N.empty())
return false;
// It is possible that directory and produce string is empty.
- return DbgNode->getNumOperands() == 12;
+ return DbgNode->getNumOperands() == 13;
}
/// Verify - Verify that an ObjC property is well formed.
@@ -580,6 +587,11 @@ bool DITemplateValueParameter::Verify() const {
return isTemplateValueParameter() && DbgNode->getNumOperands() == 8;
}
+/// \brief Verify that the imported module descriptor is well formed.
+bool DIImportedModule::Verify() const {
+ return isImportedModule() && DbgNode->getNumOperands() == 4;
+}
+
/// getOriginalTypeSize - If this type is derived from a base type then
/// return base type size.
uint64_t DIDerivedType::getOriginalTypeSize() const {
@@ -694,7 +706,7 @@ StringRef DIScope::getDirectory() const {
}
DIArray DICompileUnit::getEnumTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 12)
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)))
@@ -703,7 +715,7 @@ DIArray DICompileUnit::getEnumTypes() const {
}
DIArray DICompileUnit::getRetainedTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 12)
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
@@ -712,7 +724,7 @@ DIArray DICompileUnit::getRetainedTypes() const {
}
DIArray DICompileUnit::getSubprograms() const {
- if (!DbgNode || DbgNode->getNumOperands() < 12)
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
@@ -722,7 +734,7 @@ DIArray DICompileUnit::getSubprograms() const {
DIArray DICompileUnit::getGlobalVariables() const {
- if (!DbgNode || DbgNode->getNumOperands() < 12)
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
return DIArray();
if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
@@ -730,6 +742,15 @@ DIArray DICompileUnit::getGlobalVariables() const {
return DIArray();
}
+DIArray DICompileUnit::getImportedModules() const {
+ if (!DbgNode || DbgNode->getNumOperands() < 13)
+ return DIArray();
+
+ if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+ return DIArray(N);
+ return DIArray();
+}
+
/// fixupObjcLikeName - Replace contains special characters used
/// in a typical Objective-C names with '.' in a given string.
static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
@@ -1054,8 +1075,13 @@ void DIScope::printInternal(raw_ostream &OS) const {
void DICompileUnit::printInternal(raw_ostream &OS) const {
DIScope::printInternal(OS);
- if (const char *Lang = dwarf::LanguageString(getLanguage()))
- OS << " [" << Lang << ']';
+ OS << " [";
+ unsigned Lang = getLanguage();
+ if (const char *LangStr = dwarf::LanguageString(Lang))
+ OS << LangStr;
+ else
+ (OS << "lang 0x").write_hex(Lang);
+ OS << ']';
}
void DIEnumerator::printInternal(raw_ostream &OS) const {
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index 1e72b90..7f7efab 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -124,6 +124,13 @@ bool Argument::hasStructRetAttr() const {
hasAttribute(1, Attribute::StructRet);
}
+/// hasReturnedAttr - Return true if this argument has the returned attribute on
+/// it in its containing function.
+bool Argument::hasReturnedAttr() const {
+ return getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::Returned);
+}
+
/// addAttr - Add attributes to an argument.
void Argument::addAttr(AttributeSet AS) {
assert(AS.getNumSlots() <= 1 &&
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index 0228aeb..6a6b7af 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -403,42 +403,6 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
}
}
-MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
- if (!A || !B)
- return NULL;
-
- if (A == B)
- return A;
-
- SmallVector<MDNode *, 4> PathA;
- MDNode *T = A;
- while (T) {
- PathA.push_back(T);
- T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
- }
-
- SmallVector<MDNode *, 4> PathB;
- T = B;
- while (T) {
- PathB.push_back(T);
- T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
- }
-
- int IA = PathA.size() - 1;
- int IB = PathB.size() - 1;
-
- MDNode *Ret = 0;
- while (IA >= 0 && IB >=0) {
- if (PathA[IA] == PathB[IB])
- Ret = PathA[IA];
- else
- break;
- --IA;
- --IB;
- }
- return Ret;
-}
-
MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
if (!A || !B)
return NULL;
diff --git a/contrib/llvm/lib/IR/PassManager.cpp b/contrib/llvm/lib/IR/PassManager.cpp
index 3c968aa..387094a 100644
--- a/contrib/llvm/lib/IR/PassManager.cpp
+++ b/contrib/llvm/lib/IR/PassManager.cpp
@@ -42,14 +42,14 @@ namespace llvm {
// Different debug levels that can be enabled...
enum PassDebugLevel {
- None, Arguments, Structure, Executions, Details
+ Disabled, Arguments, Structure, Executions, Details
};
static cl::opt<enum PassDebugLevel>
PassDebugging("debug-pass", cl::Hidden,
cl::desc("Print PassManager debugging information"),
cl::values(
- clEnumVal(None , "disable debug output"),
+ clEnumVal(Disabled , "disable debug output"),
clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
clEnumVal(Structure , "print pass structure before run()"),
clEnumVal(Executions, "print pass name before it is executed"),
diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp
index 1e6a51a..46c61fc 100644
--- a/contrib/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm/lib/IR/Type.cpp
@@ -380,7 +380,7 @@ FunctionType *FunctionType::get(Type *ReturnType,
}
FunctionType *FunctionType::get(Type *Result, bool isVarArg) {
- return get(Result, ArrayRef<Type *>(), isVarArg);
+ return get(Result, None, isVarArg);
}
/// isValidReturnType - Return true if the specified type is valid as a return
@@ -499,7 +499,7 @@ StructType *StructType::create(LLVMContext &Context, StringRef Name) {
}
StructType *StructType::get(LLVMContext &Context, bool isPacked) {
- return get(Context, llvm::ArrayRef<Type*>(), isPacked);
+ return get(Context, None, isPacked);
}
StructType *StructType::get(Type *type, ...) {
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
index adc702e..89a3c05 100644
--- a/contrib/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -118,7 +118,7 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
return true;
- if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+ if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator
break;
}
@@ -333,6 +333,7 @@ namespace {
// Various metrics for how much to strip off of pointers.
enum PointerStripKind {
PSK_ZeroIndices,
+ PSK_ZeroIndicesAndAliases,
PSK_InBoundsConstantIndices,
PSK_InBounds
};
@@ -350,6 +351,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
switch (StripKind) {
+ case PSK_ZeroIndicesAndAliases:
case PSK_ZeroIndices:
if (!GEP->hasAllZeroIndices())
return V;
@@ -367,7 +369,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->mayBeOverridden())
+ if (StripKind == PSK_ZeroIndices || GA->mayBeOverridden())
return V;
V = GA->getAliasee();
} else {
@@ -381,6 +383,10 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
} // namespace
Value *Value::stripPointerCasts() {
+ return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
+}
+
+Value *Value::stripPointerCastsNoFollowAliases() {
return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
}
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 8bfbb32..d106173 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -301,9 +301,12 @@ namespace {
bool VerifyIntrinsicType(Type *Ty,
ArrayRef<Intrinsic::IITDescriptor> &Infos,
SmallVectorImpl<Type*> &ArgTys);
- void VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+ bool VerifyAttributeCount(AttributeSet Attrs, unsigned Params);
+ void VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
+ bool isFunction, const Value *V);
+ void VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
bool isReturnValue, const Value *V);
- void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs,
+ void VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
const Value *V);
void WriteValue(const Value *V) {
@@ -446,6 +449,30 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) {
}
}
+ if (GV.hasName() && (GV.getName() == "llvm.used" ||
+ GV.getName() == "llvm.compiler_used")) {
+ Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+ "invalid linkage for intrinsic global variable", &GV);
+ Type *GVType = GV.getType()->getElementType();
+ if (ArrayType *ATy = dyn_cast<ArrayType>(GVType)) {
+ PointerType *PTy = dyn_cast<PointerType>(ATy->getElementType());
+ Assert1(PTy, "wrong type for intrinsic global variable", &GV);
+ if (GV.hasInitializer()) {
+ Constant *Init = GV.getInitializer();
+ ConstantArray *InitArray = dyn_cast<ConstantArray>(Init);
+ Assert1(InitArray, "wrong initalizer for intrinsic global variable",
+ Init);
+ for (unsigned i = 0, e = InitArray->getNumOperands(); i != e; ++i) {
+ Value *V = Init->getOperand(i)->stripPointerCasts();
+ // stripPointerCasts strips aliases, so we only need to check for
+ // variables and functions.
+ Assert1(isa<GlobalVariable>(V) || isa<Function>(V),
+ "invalid llvm.used member", V);
+ }
+ }
+ }
+ }
+
visitGlobalValue(GV);
}
@@ -626,44 +653,74 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*>&SeenIDs,
}
}
+void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
+ bool isFunction, const Value* V) {
+ unsigned Slot = ~0U;
+ for (unsigned I = 0, E = Attrs.getNumSlots(); I != E; ++I)
+ if (Attrs.getSlotIndex(I) == Idx) {
+ Slot = I;
+ break;
+ }
+
+ assert(Slot != ~0U && "Attribute set inconsistency!");
+
+ for (AttributeSet::iterator I = Attrs.begin(Slot), E = Attrs.end(Slot);
+ I != E; ++I) {
+ if (I->isStringAttribute())
+ continue;
+
+ if (I->getKindAsEnum() == Attribute::NoReturn ||
+ I->getKindAsEnum() == Attribute::NoUnwind ||
+ I->getKindAsEnum() == Attribute::ReadNone ||
+ I->getKindAsEnum() == Attribute::ReadOnly ||
+ I->getKindAsEnum() == Attribute::NoInline ||
+ I->getKindAsEnum() == Attribute::AlwaysInline ||
+ I->getKindAsEnum() == Attribute::OptimizeForSize ||
+ I->getKindAsEnum() == Attribute::StackProtect ||
+ I->getKindAsEnum() == Attribute::StackProtectReq ||
+ I->getKindAsEnum() == Attribute::StackProtectStrong ||
+ I->getKindAsEnum() == Attribute::NoRedZone ||
+ I->getKindAsEnum() == Attribute::NoImplicitFloat ||
+ I->getKindAsEnum() == Attribute::Naked ||
+ I->getKindAsEnum() == Attribute::InlineHint ||
+ I->getKindAsEnum() == Attribute::StackAlignment ||
+ I->getKindAsEnum() == Attribute::UWTable ||
+ I->getKindAsEnum() == Attribute::NonLazyBind ||
+ I->getKindAsEnum() == Attribute::ReturnsTwice ||
+ I->getKindAsEnum() == Attribute::SanitizeAddress ||
+ I->getKindAsEnum() == Attribute::SanitizeThread ||
+ I->getKindAsEnum() == Attribute::SanitizeMemory ||
+ I->getKindAsEnum() == Attribute::MinSize ||
+ I->getKindAsEnum() == Attribute::NoDuplicate ||
+ I->getKindAsEnum() == Attribute::NoBuiltin) {
+ if (!isFunction)
+ CheckFailed("Attribute '" + I->getKindAsString() +
+ "' only applies to functions!", V);
+ return;
+ } else if (isFunction) {
+ CheckFailed("Attribute '" + I->getKindAsString() +
+ "' does not apply to functions!", V);
+ return;
+ }
+ }
+}
+
// VerifyParameterAttrs - Check the given attributes for an argument or return
// value of the specified type. The value V is printed in error messages.
-void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
bool isReturnValue, const Value *V) {
if (!Attrs.hasAttributes(Idx))
return;
- Assert1(!Attrs.hasAttribute(Idx, Attribute::NoReturn) &&
- !Attrs.hasAttribute(Idx, Attribute::NoUnwind) &&
- !Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
- !Attrs.hasAttribute(Idx, Attribute::ReadOnly) &&
- !Attrs.hasAttribute(Idx, Attribute::NoInline) &&
- !Attrs.hasAttribute(Idx, Attribute::AlwaysInline) &&
- !Attrs.hasAttribute(Idx, Attribute::OptimizeForSize) &&
- !Attrs.hasAttribute(Idx, Attribute::StackProtect) &&
- !Attrs.hasAttribute(Idx, Attribute::StackProtectReq) &&
- !Attrs.hasAttribute(Idx, Attribute::NoRedZone) &&
- !Attrs.hasAttribute(Idx, Attribute::NoImplicitFloat) &&
- !Attrs.hasAttribute(Idx, Attribute::Naked) &&
- !Attrs.hasAttribute(Idx, Attribute::InlineHint) &&
- !Attrs.hasAttribute(Idx, Attribute::StackAlignment) &&
- !Attrs.hasAttribute(Idx, Attribute::UWTable) &&
- !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) &&
- !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) &&
- !Attrs.hasAttribute(Idx, Attribute::SanitizeAddress) &&
- !Attrs.hasAttribute(Idx, Attribute::SanitizeThread) &&
- !Attrs.hasAttribute(Idx, Attribute::SanitizeMemory) &&
- !Attrs.hasAttribute(Idx, Attribute::MinSize) &&
- !Attrs.hasAttribute(Idx, Attribute::NoBuiltin),
- "Some attributes in '" + Attrs.getAsString(Idx) +
- "' only apply to functions!", V);
+ VerifyAttributeTypes(Attrs, Idx, false, V);
if (isReturnValue)
Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
!Attrs.hasAttribute(Idx, Attribute::Nest) &&
!Attrs.hasAttribute(Idx, Attribute::StructRet) &&
- !Attrs.hasAttribute(Idx, Attribute::NoCapture),
- "Attribute 'byval', 'nest', 'sret', and 'nocapture' "
+ !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
+ !Attrs.hasAttribute(Idx, Attribute::Returned),
+ "Attribute 'byval', 'nest', 'sret', 'nocapture', and 'returned' "
"do not apply to return values!", V);
// Check for mutually incompatible attributes.
@@ -683,6 +740,10 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
Attrs.hasAttribute(Idx, Attribute::InReg))), "Attributes "
"'byval, nest, and inreg' are incompatible!", V);
+ Assert1(!(Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+ Attrs.hasAttribute(Idx, Attribute::Returned)), "Attributes "
+ "'sret and returned' are incompatible!", V);
+
Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes "
"'zeroext and signext' are incompatible!", V);
@@ -712,81 +773,51 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
// VerifyFunctionAttrs - Check parameter attributes against a function type.
// The value V is printed in error messages.
-void Verifier::VerifyFunctionAttrs(FunctionType *FT,
- const AttributeSet &Attrs,
+void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
const Value *V) {
if (Attrs.isEmpty())
return;
bool SawNest = false;
+ bool SawReturned = false;
for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
- unsigned Index = Attrs.getSlotIndex(i);
+ unsigned Idx = Attrs.getSlotIndex(i);
Type *Ty;
- if (Index == 0)
+ if (Idx == 0)
Ty = FT->getReturnType();
- else if (Index-1 < FT->getNumParams())
- Ty = FT->getParamType(Index-1);
+ else if (Idx-1 < FT->getNumParams())
+ Ty = FT->getParamType(Idx-1);
else
break; // VarArgs attributes, verified elsewhere.
- VerifyParameterAttrs(Attrs, Index, Ty, Index == 0, V);
+ VerifyParameterAttrs(Attrs, Idx, Ty, Idx == 0, V);
- if (Attrs.hasAttribute(i, Attribute::Nest)) {
+ if (Idx == 0)
+ continue;
+
+ if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
Assert1(!SawNest, "More than one parameter has attribute nest!", V);
SawNest = true;
}
- if (Attrs.hasAttribute(Index, Attribute::StructRet))
- Assert1(Index == 1, "Attribute sret is not on first parameter!", V);
+ if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
+ Assert1(!SawReturned, "More than one parameter has attribute returned!",
+ V);
+ Assert1(Ty->canLosslesslyBitCastTo(FT->getReturnType()), "Incompatible "
+ "argument and return types for 'returned' attribute", V);
+ SawReturned = true;
+ }
+
+ if (Attrs.hasAttribute(Idx, Attribute::StructRet))
+ Assert1(Idx == 1, "Attribute sret is not on first parameter!", V);
}
if (!Attrs.hasAttributes(AttributeSet::FunctionIndex))
return;
- AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex);
- NotFn.removeFunctionOnlyAttrs();
- Assert1(NotFn.empty(), "Attributes '" +
- AttributeSet::get(V->getContext(),
- AttributeSet::FunctionIndex,
- NotFn).getAsString(AttributeSet::FunctionIndex) +
- "' do not apply to the function!", V);
-
- // Check for mutually incompatible attributes.
- Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ByVal) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Nest)) ||
- (Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ByVal) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StructRet)) ||
- (Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Nest) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StructRet))),
- "Attributes 'byval, nest, and sret' are incompatible!", V);
-
- Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ByVal) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Nest)) ||
- (Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ByVal) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::InReg)) ||
- (Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Nest) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::InReg))),
- "Attributes 'byval, nest, and inreg' are incompatible!", V);
-
- Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ZExt) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::SExt)),
- "Attributes 'zeroext and signext' are incompatible!", V);
+ VerifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V);
Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::ReadNone) &&
@@ -801,7 +832,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
"Attributes 'noinline and alwaysinline' are incompatible!", V);
}
-static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) {
+bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) {
if (Attrs.getNumSlots() == 0)
return true;
@@ -837,7 +868,7 @@ void Verifier::visitFunction(Function &F) {
Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
"Invalid struct return type!", &F);
- const AttributeSet &Attrs = F.getAttributes();
+ AttributeSet Attrs = F.getAttributes();
Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
"Attribute after last parameter!", &F);
@@ -1350,7 +1381,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
"Call parameter type does not match function signature!",
CS.getArgument(i), FTy->getParamType(i), I);
- const AttributeSet &Attrs = CS.getAttributes();
+ AttributeSet Attrs = CS.getAttributes();
Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
"Attribute after last parameter!", I);
@@ -1358,15 +1389,41 @@ void Verifier::VerifyCallSite(CallSite CS) {
// Verify call attributes.
VerifyFunctionAttrs(FTy, Attrs, I);
- if (FTy->isVarArg())
+ if (FTy->isVarArg()) {
+ // FIXME? is 'nest' even legal here?
+ bool SawNest = false;
+ bool SawReturned = false;
+
+ for (unsigned Idx = 1; Idx < 1 + FTy->getNumParams(); ++Idx) {
+ if (Attrs.hasAttribute(Idx, Attribute::Nest))
+ SawNest = true;
+ if (Attrs.hasAttribute(Idx, Attribute::Returned))
+ SawReturned = true;
+ }
+
// Check attributes on the varargs part.
for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
- VerifyParameterAttrs(Attrs, Idx, CS.getArgument(Idx-1)->getType(),
- false, I);
+ Type *Ty = CS.getArgument(Idx-1)->getType();
+ VerifyParameterAttrs(Attrs, Idx, Ty, false, I);
+
+ if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
+ Assert1(!SawNest, "More than one parameter has attribute nest!", I);
+ SawNest = true;
+ }
+
+ if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
+ Assert1(!SawReturned, "More than one parameter has attribute returned!",
+ I);
+ Assert1(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
+ "Incompatible argument and return types for 'returned' "
+ "attribute", I);
+ SawReturned = true;
+ }
Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet),
"Attribute 'sret' cannot be used for vararg call arguments!", I);
}
+ }
// Verify that there's no metadata unless it's a direct call to an intrinsic.
if (CS.getCalledFunction() == 0 ||
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index 74cbdad..d2e13c9 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -13,21 +13,15 @@
#include "llvm/Linker.h"
#include "llvm-c/Linker.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <cctype>
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -35,6 +29,8 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
namespace {
+ typedef SmallPtrSet<StructType*, 32> TypeSet;
+
class TypeMapTy : public ValueMapTypeRemapper {
/// MappedTypes - This is a mapping from a source type to a destination type
/// to use.
@@ -55,6 +51,9 @@ class TypeMapTy : public ValueMapTypeRemapper {
SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
public:
+ TypeMapTy(TypeSet &Set) : DstStructTypesSet(Set) {}
+
+ TypeSet &DstStructTypesSet;
/// addTypeMapping - Indicate that the specified type in the destination
/// module is conceptually equivalent to the specified type in the source
/// module.
@@ -331,13 +330,20 @@ Type *TypeMapTy::getImpl(Type *Ty) {
StructType *STy = cast<StructType>(Ty);
// If the type is opaque, we can just use it directly.
- if (STy->isOpaque())
+ if (STy->isOpaque()) {
+ // A named structure type from src module is used. Add it to the Set of
+ // identified structs in the destination module.
+ DstStructTypesSet.insert(STy);
return *Entry = STy;
+ }
// Otherwise we create a new type and resolve its body later. This will be
// resolved by the top level of get().
SrcDefinitionsToResolve.push_back(STy);
StructType *DTy = StructType::create(STy->getContext());
+ // A new identified structure type was created. Add it to the set of
+ // identified structs in the destination module.
+ DstStructTypesSet.insert(DTy);
DstResolvedOpaqueTypes.insert(DTy);
return *Entry = DTy;
}
@@ -379,8 +385,8 @@ namespace {
public:
std::string ErrorMsg;
- ModuleLinker(Module *dstM, Module *srcM, unsigned mode)
- : DstM(dstM), SrcM(srcM), Mode(mode) { }
+ ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM, unsigned mode)
+ : DstM(dstM), SrcM(srcM), TypeMap(Set), Mode(mode) { }
bool run();
@@ -594,11 +600,6 @@ void ModuleLinker::computeTypeMapping() {
SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(),
SrcStructTypes.end());
- TypeFinder DstStructTypes;
- DstStructTypes.run(*DstM, true);
- SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(),
- DstStructTypes.end());
-
for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) {
StructType *ST = SrcStructTypes[i];
if (!ST->hasName()) continue;
@@ -629,7 +630,7 @@ void ModuleLinker::computeTypeMapping() {
// we prefer to take the '%C' version. So we are then left with both
// '%C.1' and '%C' being used for the same types. This leads to some
// variables using one type and some using the other.
- if (!SrcStructTypesSet.count(DST) && DstStructTypesSet.count(DST))
+ if (!SrcStructTypesSet.count(DST) && TypeMap.DstStructTypesSet.count(DST))
TypeMap.addTypeMapping(DST, ST);
}
@@ -1287,6 +1288,25 @@ bool ModuleLinker::run() {
return false;
}
+Linker::Linker(Module *M) : Composite(M) {
+ TypeFinder StructTypes;
+ StructTypes.run(*M, true);
+ IdentifiedStructTypes.insert(StructTypes.begin(), StructTypes.end());
+}
+
+Linker::~Linker() {
+}
+
+bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) {
+ ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, Mode);
+ if (TheLinker.run()) {
+ if (ErrorMsg)
+ *ErrorMsg = TheLinker.ErrorMsg;
+ return true;
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// LinkModules entrypoint.
//===----------------------------------------------------------------------===//
@@ -1298,13 +1318,8 @@ bool ModuleLinker::run() {
/// and shouldn't be relied on to be consistent.
bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
std::string *ErrorMsg) {
- ModuleLinker TheLinker(Dest, Src, Mode);
- if (TheLinker.run()) {
- if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
- return true;
- }
-
- return false;
+ Linker L(Dest);
+ return L.linkInModule(Src, Mode, ErrorMsg);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Linker/Linker.cpp b/contrib/llvm/lib/Linker/Linker.cpp
deleted file mode 100644
index 74d24f2..0000000
--- a/contrib/llvm/lib/Linker/Linker.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===- lib/Linker/Linker.cpp - Basic Linker functionality ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains basic Linker functionality that all usages will need.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Linker.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-using namespace llvm;
-
-Linker::Linker(StringRef progname, StringRef modname,
- LLVMContext& C, unsigned flags):
- Context(C),
- Composite(new Module(modname, C)),
- Flags(flags),
- Error(),
- ProgramName(progname) { }
-
-Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
- Context(aModule->getContext()),
- Composite(aModule),
- Flags(flags),
- Error(),
- ProgramName(progname) { }
-
-Linker::~Linker() {
- delete Composite;
-}
-
-bool
-Linker::error(StringRef message) {
- Error = message;
- if (!(Flags&QuietErrors))
- errs() << ProgramName << ": error: " << message << "\n";
- return true;
-}
-
-bool
-Linker::warning(StringRef message) {
- Error = message;
- if (!(Flags&QuietWarnings))
- errs() << ProgramName << ": warning: " << message << "\n";
- return false;
-}
-
-void
-Linker::verbose(StringRef message) {
- if (Flags&Verbose)
- errs() << " " << message << "\n";
-}
-
-Module*
-Linker::releaseModule() {
- Module* result = Composite;
- Error.clear();
- Composite = 0;
- Flags = 0;
- return result;
-}
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
index 51bb435..9e60884 100644
--- a/contrib/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -87,10 +87,10 @@ MCAsmInfo::MCAsmInfo() {
SupportsDebugInformation = false;
ExceptionsType = ExceptionHandling::None;
DwarfUsesInlineInfoSection = false;
- DwarfSectionOffsetDirective = 0;
DwarfUsesRelocationsAcrossSections = true;
DwarfRegNumForCFI = false;
HasMicrosoftFastStdCallMangling = false;
+ NeedsDwarfSectionOffsetDirective = false;
}
MCAsmInfo::~MCAsmInfo() {
diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
index fd79193..33350d9 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp
@@ -36,8 +36,8 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
SupportsDebugInformation = true;
- DwarfSectionOffsetDirective = "\t.secrel32\t";
HasMicrosoftFastStdCallMangling = true;
+ NeedsDwarfSectionOffsetDirective = true;
}
void MCAsmInfoMicrosoft::anchor() { }
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index 35613b4..9e86785 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -124,19 +124,15 @@ public:
/// @name MCStreamer Interface
/// @{
- virtual void ChangeSection(const MCSection *Section);
+ virtual void ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection);
virtual void InitSections() {
InitToTextSection();
}
virtual void InitToTextSection() {
- // FIXME, this is MachO specific, but the testsuite
- // expects this.
- SwitchSection(getContext().getMachOSection(
- "__TEXT", "__text",
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- 0, SectionKind::getText()));
+ SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
virtual void EmitLabel(MCSymbol *Symbol);
@@ -333,9 +329,10 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
}
-void MCAsmStreamer::ChangeSection(const MCSection *Section) {
+void MCAsmStreamer::ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
- Section->PrintSwitchToSection(MAI, OS);
+ Section->PrintSwitchToSection(MAI, OS, Subsection);
}
void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
@@ -642,7 +639,8 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
- assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ assert(getCurrentSection().first &&
+ "Cannot emit contents before setting section!");
if (Data.empty()) return;
if (Data.size() == 1) {
@@ -673,7 +671,8 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
unsigned AddrSpace) {
- assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ assert(getCurrentSection().first &&
+ "Cannot emit contents before setting section!");
const char *Directive = 0;
switch (Size) {
default: break;
@@ -1368,7 +1367,8 @@ void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
}
void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
- assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ assert(getCurrentSection().first &&
+ "Cannot emit contents before setting section!");
// Show the encoding in a comment if we have a code emitter.
if (Emitter)
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index 1829266..fb5ab28 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -243,6 +243,36 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
A->getSectionList().push_back(this);
}
+MCSectionData::iterator
+MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
+ if (Subsection == 0 && SubsectionFragmentMap.empty())
+ return end();
+
+ SmallVectorImpl<std::pair<unsigned, MCFragment *> >::iterator MI =
+ std::lower_bound(SubsectionFragmentMap.begin(), SubsectionFragmentMap.end(),
+ std::make_pair(Subsection, (MCFragment *)0));
+ bool ExactMatch = false;
+ if (MI != SubsectionFragmentMap.end()) {
+ ExactMatch = MI->first == Subsection;
+ if (ExactMatch)
+ ++MI;
+ }
+ iterator IP;
+ if (MI == SubsectionFragmentMap.end())
+ IP = end();
+ else
+ IP = MI->second;
+ if (!ExactMatch && Subsection != 0) {
+ // The GNU as documentation claims that subsections have an alignment of 4,
+ // although this appears not to be the case.
+ MCFragment *F = new MCDataFragment();
+ SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F));
+ getFragmentList().insert(IP, F);
+ F->setParent(this);
+ }
+ return IP;
+}
+
/* *** */
MCSymbolData::MCSymbolData() : Symbol(0) {}
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index 0f8f074..18982e9 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -197,6 +197,8 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
// actually a DW_LNE_end_sequence.
// Switch to the section to be able to create a symbol at its end.
+ // TODO: keep track of the last subsection so that this symbol appears in the
+ // correct place.
MCOS->SwitchSection(Section);
MCContext &context = MCOS->getContext();
@@ -787,7 +789,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
if (Symbol->isTemporary())
return;
MCContext &context = MCOS->getContext();
- if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+ if (context.getGenDwarfSection() != MCOS->getCurrentSection().first)
return;
// The dwarf label's name does not have the symbol name's leading
@@ -899,7 +901,7 @@ namespace {
/// EmitCompactUnwind - Emit the unwind information in a compact way. If
/// we're successful, return 'true'. Otherwise, return 'false' and it will
/// emit the normal CIE and FDE.
- bool EmitCompactUnwind(MCStreamer &streamer,
+ void EmitCompactUnwind(MCStreamer &streamer,
const MCDwarfFrameInfo &frame);
const MCSymbol &EmitCIE(MCStreamer &streamer,
@@ -1139,7 +1141,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
/// successful, return 'true'. Otherwise, return 'false' and it will emit the
/// normal CIE and FDE.
-bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
const MCDwarfFrameInfo &Frame) {
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
@@ -1168,14 +1170,13 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
// .quad except_tab1
uint32_t Encoding = Frame.CompactUnwindEncoding;
- if (!Encoding) return false;
+ if (!Encoding) return;
+ bool DwarfEHFrameOnly = (Encoding == MOFI->getCompactUnwindDwarfEHFrameOnly());
// The encoding needs to know we have an LSDA.
- if (Frame.Lsda)
+ if (!DwarfEHFrameOnly && Frame.Lsda)
Encoding |= 0x40000000;
- Streamer.SwitchSection(MOFI->getCompactUnwindSection());
-
// Range Start
unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI);
unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
@@ -1194,11 +1195,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
Twine::utohexstr(Encoding));
Streamer.EmitIntValue(Encoding, Size);
-
// Personality Function
Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
if (VerboseAsm) Streamer.AddComment("Personality Function");
- if (Frame.Personality)
+ if (!DwarfEHFrameOnly && Frame.Personality)
Streamer.EmitSymbolValue(Frame.Personality, Size);
else
Streamer.EmitIntValue(0, Size); // No personality fn
@@ -1206,12 +1206,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
// LSDA
Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
if (VerboseAsm) Streamer.AddComment("LSDA");
- if (Frame.Lsda)
+ if (!DwarfEHFrameOnly && Frame.Lsda)
Streamer.EmitSymbolValue(Frame.Lsda, Size);
else
Streamer.EmitIntValue(0, Size); // No LSDA
-
- return true;
}
const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
@@ -1421,7 +1419,6 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
}
// Call Frame Instructions
-
EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
// Padding
@@ -1482,12 +1479,23 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
// Emit the compact unwind info if available.
- if (IsEH && MOFI->getCompactUnwindSection())
- for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
- const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
- if (Frame.CompactUnwindEncoding)
+ if (IsEH && MOFI->getCompactUnwindSection()) {
+ unsigned NumFrameInfos = Streamer.getNumFrameInfos();
+ bool SectionEmitted = false;
+
+ if (NumFrameInfos) {
+ for (unsigned i = 0; i < NumFrameInfos; ++i) {
+ const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+ if (Frame.CompactUnwindEncoding == 0) continue;
+ if (!SectionEmitted) {
+ Streamer.SwitchSection(MOFI->getCompactUnwindSection());
+ Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize());
+ SectionEmitted = true;
+ }
Emitter.EmitCompactUnwind(Streamer, Frame);
+ }
}
+ }
const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() :
*MOFI->getDwarfFrameSection();
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
index 7f5f1b6..116f86f 100644
--- a/contrib/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -108,14 +109,15 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
llvm_unreachable("invalid assembler flag!");
}
-void MCELFStreamer::ChangeSection(const MCSection *Section) {
+void MCELFStreamer::ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {
MCSectionData *CurSection = getCurrentSectionData();
if (CurSection && CurSection->isBundleLocked())
report_fatal_error("Unterminated .bundle_lock when changing a section");
const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
if (Grp)
getAssembler().getOrCreateSymbolData(*Grp);
- this->MCObjectStreamer::ChangeSection(Section);
+ this->MCObjectStreamer::ChangeSection(Section, Subsection);
}
void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
@@ -126,6 +128,26 @@ void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
Alias->setVariableValue(Value);
}
+// When GNU as encounters more than one .type declaration for an object it seems
+// to use a mechanism similar to the one below to decide which type is actually
+// used in the object file. The greater of T1 and T2 is selected based on the
+// following ordering:
+// STT_NOTYPE < STT_OBJECT < STT_FUNC < STT_GNU_IFUNC < STT_TLS < anything else
+// If neither T1 < T2 nor T2 < T1 according to this ordering, use T2 (the user
+// provided type).
+static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
+ unsigned TypeOrdering[] = {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC,
+ ELF::STT_GNU_IFUNC, ELF::STT_TLS};
+ for (unsigned i = 0; i != array_lengthof(TypeOrdering); ++i) {
+ if (T1 == TypeOrdering[i])
+ return T2;
+ if (T2 == TypeOrdering[i])
+ return T1;
+ }
+
+ return T2;
+}
+
void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
// Indirect symbols are handled differently, to match how 'as' handles
@@ -187,27 +209,34 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
break;
case MCSA_ELF_TypeFunction:
- MCELF::SetType(SD, ELF::STT_FUNC);
+ MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ ELF::STT_FUNC));
break;
case MCSA_ELF_TypeIndFunction:
- MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+ MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ ELF::STT_GNU_IFUNC));
break;
case MCSA_ELF_TypeObject:
- MCELF::SetType(SD, ELF::STT_OBJECT);
+ MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ ELF::STT_OBJECT));
break;
case MCSA_ELF_TypeTLS:
- MCELF::SetType(SD, ELF::STT_TLS);
+ MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ ELF::STT_TLS));
break;
case MCSA_ELF_TypeCommon:
- MCELF::SetType(SD, ELF::STT_COMMON);
+ // TODO: Emit these as a common symbol.
+ MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ ELF::STT_OBJECT));
break;
case MCSA_ELF_TypeNoType:
- MCELF::SetType(SD, ELF::STT_NOTYPE);
+ MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+ ELF::STT_NOTYPE));
break;
case MCSA_Protected:
@@ -290,7 +319,7 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
// entry in the module's symbol table (the first being the null symbol).
void MCELFStreamer::EmitFileDirective(StringRef Filename) {
MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
- Symbol->setSection(*getCurrentSection());
+ Symbol->setSection(*getCurrentSection().first);
Symbol->setAbsolute();
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -406,11 +435,13 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
// Optimize memory usage by emitting the instruction to a
// MCCompactEncodedInstFragment when not in a bundle-locked group and
// there are no fixups registered.
- MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD);
+ MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment();
+ insert(CEIF);
CEIF->getContents().append(Code.begin(), Code.end());
return;
} else {
- DF = new MCDataFragment(SD);
+ DF = new MCDataFragment();
+ insert(DF);
if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
// If this is a new fragment created for a bundle-locked group, and the
// group was marked as "align_to_end", set a flag in the fragment.
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index cd4d144..06bc72f 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -250,6 +250,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Mips_GOT_LO16: return "GOT_LO16";
case VK_Mips_CALL_HI16: return "CALL_HI16";
case VK_Mips_CALL_LO16: return "CALL_LO16";
+ case VK_COFF_IMGREL32: return "IMGREL32";
}
llvm_unreachable("Invalid variant kind");
}
@@ -285,6 +286,44 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("dtpoff", VK_DTPOFF)
.Case("TLVP", VK_TLVP)
.Case("tlvp", VK_TLVP)
+ .Case("IMGREL", VK_COFF_IMGREL32)
+ .Case("imgrel", VK_COFF_IMGREL32)
+ .Case("SECREL32", VK_SECREL)
+ .Case("secrel32", VK_SECREL)
+ .Case("HA", VK_PPC_GAS_HA16)
+ .Case("ha", VK_PPC_GAS_HA16)
+ .Case("L", VK_PPC_GAS_LO16)
+ .Case("l", VK_PPC_GAS_LO16)
+ .Case("TOCBASE", VK_PPC_TOC)
+ .Case("tocbase", VK_PPC_TOC)
+ .Case("TOC", VK_PPC_TOC_ENTRY)
+ .Case("toc", VK_PPC_TOC_ENTRY)
+ .Case("TOC@HA", VK_PPC_TOC16_HA)
+ .Case("toc@ha", VK_PPC_TOC16_HA)
+ .Case("TOC@L", VK_PPC_TOC16_LO)
+ .Case("toc@l", VK_PPC_TOC16_LO)
+ .Case("TLS", VK_PPC_TLS)
+ .Case("tls", VK_PPC_TLS)
+ .Case("TPREL@HA", VK_PPC_TPREL16_HA)
+ .Case("tprel@ha", VK_PPC_TPREL16_HA)
+ .Case("TPREL@L", VK_PPC_TPREL16_LO)
+ .Case("tprel@l", VK_PPC_TPREL16_LO)
+ .Case("DTPREL@HA", VK_PPC_DTPREL16_HA)
+ .Case("dtprel@ha", VK_PPC_DTPREL16_HA)
+ .Case("DTPREL@L", VK_PPC_DTPREL16_LO)
+ .Case("dtprel@l", VK_PPC_DTPREL16_LO)
+ .Case("GOT@TPREL@HA", VK_PPC_GOT_TPREL16_HA)
+ .Case("got@tprel@ha", VK_PPC_GOT_TPREL16_HA)
+ .Case("GOT@TPREL@L", VK_PPC_GOT_TPREL16_LO)
+ .Case("got@tprel@l", VK_PPC_GOT_TPREL16_LO)
+ .Case("GOT@TLSGD@HA", VK_PPC_GOT_TLSGD16_HA)
+ .Case("got@tlsgd@ha", VK_PPC_GOT_TLSGD16_HA)
+ .Case("GOT@TLSGD@L", VK_PPC_GOT_TLSGD16_LO)
+ .Case("got@tlsgd@l", VK_PPC_GOT_TLSGD16_LO)
+ .Case("GOT@TLSLD@HA", VK_PPC_GOT_TLSLD16_HA)
+ .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD16_HA)
+ .Case("GOT@TLSLD@L", VK_PPC_GOT_TLSLD16_LO)
+ .Case("got@tlsld@l", VK_PPC_GOT_TLSLD16_LO)
.Default(VK_Invalid);
}
diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
index 7d08d0e..e08b01b 100644
--- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp
@@ -122,11 +122,11 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
// isSymbolLinkerVisible uses the section.
- Symbol->setSection(*getCurrentSection());
+ Symbol->setSection(*getCurrentSection().first);
// We have to create a new fragment if this is an atom defining symbol,
// fragments cannot span atoms.
if (getAssembler().isSymbolLinkerVisible(*Symbol))
- new MCDataFragment(getCurrentSectionData());
+ insert(new MCDataFragment());
MCObjectStreamer::EmitLabel(Symbol);
diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp
index c872b22..659706a 100644
--- a/contrib/llvm/lib/MC/MCNullStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp
@@ -30,13 +30,14 @@ namespace {
virtual void InitSections() {
}
- virtual void ChangeSection(const MCSection *Section) {
+ virtual void ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {
}
virtual void EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- assert(getCurrentSection() && "Cannot emit before setting section!");
- Symbol->setSection(*getCurrentSection());
+ assert(getCurrentSection().first &&"Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection().first);
}
virtual void EmitDebugLabel(MCSymbol *Symbol) {
EmitLabel(Symbol);
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index d19e79a..96b62f1 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -145,12 +145,16 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
SectionKind::getReadOnlyWithRel());
- if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+ if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) {
CompactUnwindSection =
Ctx->getMachOSection("__LD", "__compact_unwind",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getReadOnly());
+ if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
+ CompactUnwindDwarfEHFrameOnly = 0x04000000;
+ }
+
// Debug Information.
DwarfAccelNamesSection =
Ctx->getMachOSection("__DWARF", "__apple_names",
@@ -291,6 +295,22 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_udata8;
+ } else if (T.getArch() == Triple::systemz) {
+ // All currently-defined code models guarantee that 4-byte PC-relative
+ // values will be in range.
+ if (RelocM == Reloc::PIC_) {
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ } else {
+ PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+ LSDAEncoding = dwarf::DW_EH_PE_absptr;
+ FDEEncoding = dwarf::DW_EH_PE_absptr;
+ TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ }
}
// Solaris requires different flags for .eh_frame to seemingly every other
@@ -629,6 +649,8 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ CompactUnwindDwarfEHFrameOnly = 0;
+
EHFrameSection = 0; // Created on demand.
CompactUnwindSection = 0; // Used only by selected targets.
DwarfAccelNamesSection = 0; // Used only by selected targets.
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index 0d2ce83..d21ce8d 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -45,14 +46,15 @@ void MCObjectStreamer::reset() {
if (Assembler)
Assembler->reset();
CurSectionData = 0;
+ CurInsertionPoint = MCSectionData::iterator();
MCStreamer::reset();
}
MCFragment *MCObjectStreamer::getCurrentFragment() const {
assert(getCurrentSectionData() && "No current section!");
- if (!getCurrentSectionData()->empty())
- return &getCurrentSectionData()->getFragmentList().back();
+ if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin())
+ return prior(CurInsertionPoint);
return 0;
}
@@ -61,8 +63,10 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
// When bundling is enabled, we don't want to add data to a fragment that
// already has instructions (see MCELFStreamer::EmitInstToData for details)
- if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions()))
- F = new MCDataFragment(getCurrentSectionData());
+ if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
+ F = new MCDataFragment();
+ insert(F);
+ }
return F;
}
@@ -145,7 +149,7 @@ void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
return;
}
Value = ForceExpAbs(Value);
- new MCLEBFragment(*Value, false, getCurrentSectionData());
+ insert(new MCLEBFragment(*Value, false));
}
void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
@@ -155,7 +159,7 @@ void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
return;
}
Value = ForceExpAbs(Value);
- new MCLEBFragment(*Value, true, getCurrentSectionData());
+ insert(new MCLEBFragment(*Value, true));
}
void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
@@ -163,10 +167,20 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
report_fatal_error("This file format doesn't support weak aliases.");
}
-void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+void MCObjectStreamer::ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+
+ int64_t IntSubsection = 0;
+ if (Subsection &&
+ !Subsection->EvaluateAsAbsolute(IntSubsection, getAssembler()))
+ report_fatal_error("Cannot evaluate subsection number");
+ if (IntSubsection < 0 || IntSubsection > 8192)
+ report_fatal_error("Subsection number out of range");
+ CurInsertionPoint =
+ CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection));
}
void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -185,7 +199,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
// Now that a machine instruction has been assembled into this section, make
// a line entry for any .loc directive that has been seen.
- MCLineEntry::Make(this, getCurrentSection());
+ MCLineEntry::Make(this, getCurrentSection().first);
// If this instruction doesn't need relaxation, just emit it as data.
MCAssembler &Assembler = getAssembler();
@@ -216,8 +230,8 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
// Always create a new, separate fragment here, because its size can change
// during relaxation.
- MCRelaxableFragment *IF =
- new MCRelaxableFragment(Inst, getCurrentSectionData());
+ MCRelaxableFragment *IF = new MCRelaxableFragment(Inst);
+ insert(IF);
SmallString<128> Code;
raw_svector_ostream VecOS(Code);
@@ -258,7 +272,7 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
return;
}
AddrDelta = ForceExpAbs(AddrDelta);
- new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+ insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta));
}
void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
@@ -270,7 +284,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
return;
}
AddrDelta = ForceExpAbs(AddrDelta);
- new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+ insert(new MCDwarfCallFrameFragment(*AddrDelta));
}
void MCObjectStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
@@ -284,8 +298,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
- new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
- getCurrentSectionData());
+ insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
// Update the maximum alignment on the current section if necessary.
if (ByteAlignment > getCurrentSectionData()->getAlignment())
@@ -302,7 +315,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
int64_t Res;
if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
- new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+ insert(new MCOrgFragment(*Offset, Value));
return false;
}
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index 804734c..edefdb4 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -201,9 +201,9 @@ public:
}
virtual bool Warning(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+ ArrayRef<SMRange> Ranges = None);
virtual bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+ ArrayRef<SMRange> Ranges = None);
virtual const AsmToken &Lex();
@@ -221,6 +221,7 @@ public:
bool parseExpression(const MCExpr *&Res);
virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
virtual bool parseAbsoluteExpression(int64_t &Res);
@@ -285,7 +286,7 @@ private:
void PrintMacroInstantiations();
void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const {
+ ArrayRef<SMRange> Ranges = None) const {
SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
}
static void DiagHandler(const SMDiagnostic &Diag, void *Context);
@@ -601,7 +602,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// If we are generating dwarf for assembly source files save the initial text
// section and generate a .file directive.
if (getContext().getGenDwarfForAssembly()) {
- getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+ getContext().setGenDwarfSection(getStreamer().getCurrentSection().first);
MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
getStreamer().EmitLabel(SectionStartSym);
getContext().setGenDwarfSectionStartSym(SectionStartSym);
@@ -666,7 +667,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
}
void AsmParser::checkForValidSection() {
- if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
+ if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) {
TokError("expected section directive before assembly directive");
Out.InitToTextSection();
}
@@ -869,6 +870,10 @@ bool AsmParser::parseExpression(const MCExpr *&Res) {
return parseExpression(Res, EndLoc);
}
+bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+ return ParsePrimaryExpr(Res, EndLoc);
+}
+
const MCExpr *
AsmParser::ApplyModifierToExpr(const MCExpr *E,
MCSymbolRefExpr::VariantKind Variant) {
@@ -1087,7 +1092,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
MCBinaryExpr::Opcode Dummy;
unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
if (TokPrec < NextTokPrec) {
- if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
+ if (ParseBinOpRHS(TokPrec+1, RHS, EndLoc)) return true;
}
// Merge LHS and RHS according to operator.
@@ -1488,7 +1493,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
// section is the initial text section then generate a .loc directive for
// the instruction.
if (!HadError && getContext().getGenDwarfForAssembly() &&
- getContext().getGenDwarfSection() == getStreamer().getCurrentSection()) {
+ getContext().getGenDwarfSection() ==
+ getStreamer().getCurrentSection().first) {
unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
@@ -1978,7 +1984,6 @@ static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
case MCExpr::Binary: {
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Value);
return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS());
- break;
}
case MCExpr::Target:
case MCExpr::Constant:
@@ -2479,7 +2484,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
// Check whether we should use optimal code alignment for this .align
// directive.
- bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
+ bool UseCodeAlign = getStreamer().getCurrentSection().first->UseCodeAlign();
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
ValueSize == 1 && UseCodeAlign) {
getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
@@ -2631,12 +2636,10 @@ bool AsmParser::ParseDirectiveLoc() {
Flags |= DWARF2_FLAG_IS_STMT;
else
return Error(Loc, "is_stmt value not 0 or 1");
- }
- else {
+ } else {
return Error(Loc, "is_stmt value not the constant value of 0 or 1");
}
- }
- else if (Name == "isa") {
+ } else if (Name == "isa") {
Loc = getTok().getLoc();
const MCExpr *Value;
if (parseExpression(Value))
@@ -2647,16 +2650,13 @@ bool AsmParser::ParseDirectiveLoc() {
if (Value < 0)
return Error(Loc, "isa number less than zero");
Isa = Value;
- }
- else {
+ } else {
return Error(Loc, "isa number not a constant value");
}
- }
- else if (Name == "discriminator") {
+ } else if (Name == "discriminator") {
if (parseAbsoluteExpression(Discriminator))
return true;
- }
- else {
+ } else {
return Error(Loc, "unknown sub-directive in '.loc' directive");
}
@@ -3615,18 +3615,17 @@ bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
if (TheCondState.TheCond != AsmCond::IfCond &&
TheCondState.TheCond != AsmCond::ElseIfCond)
- Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
- " an .elseif");
+ Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+ " an .elseif");
TheCondState.TheCond = AsmCond::ElseIfCond;
bool LastIgnoreState = false;
if (!TheCondStack.empty())
- LastIgnoreState = TheCondStack.back().Ignore;
+ LastIgnoreState = TheCondStack.back().Ignore;
if (LastIgnoreState || TheCondState.CondMet) {
TheCondState.Ignore = true;
eatToEndOfStatement();
- }
- else {
+ } else {
int64_t ExprValue;
if (parseAbsoluteExpression(ExprValue))
return true;
@@ -3652,8 +3651,8 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
if (TheCondState.TheCond != AsmCond::IfCond &&
TheCondState.TheCond != AsmCond::ElseIfCond)
- Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
- ".elseif");
+ Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+ ".elseif");
TheCondState.TheCond = AsmCond::ElseCond;
bool LastIgnoreState = false;
if (!TheCondStack.empty())
@@ -4046,19 +4045,17 @@ static int RewritesSort(const void *A, const void *B) {
if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
return 1;
- // It's possible to have a SizeDirective rewrite and an Input/Output rewrite
- // to the same location. Make sure the SizeDirective rewrite is performed
- // first. This also ensure the sort algorithm is stable.
- if (AsmRewriteA->Kind == AOK_SizeDirective) {
- assert ((AsmRewriteB->Kind == AOK_Input || AsmRewriteB->Kind == AOK_Output) &&
- "Expected an Input/Output rewrite!");
+ // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
+ // rewrite to the same location. Make sure the SizeDirective rewrite is
+ // performed first, then the Imm/ImmPrefix and finally the Input/Output. This
+ // ensures the sort algorithm is stable.
+ if (AsmRewritePrecedence [AsmRewriteA->Kind] >
+ AsmRewritePrecedence [AsmRewriteB->Kind])
return -1;
- }
- if (AsmRewriteB->Kind == AOK_SizeDirective) {
- assert ((AsmRewriteA->Kind == AOK_Input || AsmRewriteA->Kind == AOK_Output) &&
- "Expected an Input/Output rewrite!");
+
+ if (AsmRewritePrecedence [AsmRewriteA->Kind] <
+ AsmRewritePrecedence [AsmRewriteB->Kind])
return 1;
- }
llvm_unreachable ("Unstable rewrite sort.");
}
@@ -4118,28 +4115,27 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
}
// Expr/Input or Output.
- bool IsVarDecl;
- unsigned Length, Size, Type;
- void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
- Length, Size, Type,
- IsVarDecl);
+ StringRef SymName = Operand->getSymName();
+ if (SymName.empty())
+ continue;
+
+ void *OpDecl = Operand->getOpDecl();
if (!OpDecl)
continue;
bool isOutput = (i == 1) && Desc.mayStore();
+ SMLoc Start = SMLoc::getFromPointer(SymName.data());
if (isOutput) {
++InputIdx;
OutputDecls.push_back(OpDecl);
OutputDeclsAddressOf.push_back(Operand->needAddressOf());
OutputConstraints.push_back('=' + Operand->getConstraint().str());
- AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Operand->getStartLoc(),
- Operand->getNameLen()));
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
} else {
InputDecls.push_back(OpDecl);
InputDeclsAddressOf.push_back(Operand->needAddressOf());
InputConstraints.push_back(Operand->getConstraint().str());
- AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(),
- Operand->getNameLen()));
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size()));
}
}
}
@@ -4182,20 +4178,17 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
E = AsmStrRewrites.end();
I != E; ++I) {
+ AsmRewriteKind Kind = (*I).Kind;
+ if (Kind == AOK_Delete)
+ continue;
+
const char *Loc = (*I).Loc.getPointer();
assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
- unsigned AdditionalSkip = 0;
- AsmRewriteKind Kind = (*I).Kind;
-
// Emit everything up to the immediate/expression.
unsigned Len = Loc - AsmStart;
- if (Len) {
- // For Input/Output operands we need to remove the brackets, if present.
- if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[')
- --Len;
+ if (Len)
OS << StringRef(AsmStart, Len);
- }
// Skip the original expression.
if (Kind == AOK_Skip) {
@@ -4203,6 +4196,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
continue;
}
+ unsigned AdditionalSkip = 0;
// Rewrite expressions in $N notation.
switch (Kind) {
default: break;
@@ -4249,11 +4243,6 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
// Skip the original expression.
AsmStart = Loc + (*I).Len + AdditionalSkip;
-
- // For Input/Output operands we need to remove the brackets, if present.
- if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd &&
- *AsmStart == ']')
- ++AsmStart;
}
// Emit the remainder of the asm string.
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 6d6409f..7eb8b74 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -566,10 +566,10 @@ bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
/// ParseDirectivePrevious:
/// ::= .previous
bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
- const MCSection *PreviousSection = getStreamer().getPreviousSection();
- if (PreviousSection == NULL)
+ MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection.first == NULL)
return TokError(".previous without corresponding .section");
- getStreamer().SwitchSection(PreviousSection);
+ getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
return false;
}
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 4c45e08..3134fc3 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -76,6 +76,7 @@ public:
&ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal");
addDirectiveHandler<
&ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
+ addDirectiveHandler<&ELFAsmParser::ParseDirectiveSubsection>(".subsection");
}
// FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
@@ -147,9 +148,11 @@ public:
bool ParseDirectiveVersion(StringRef, SMLoc);
bool ParseDirectiveWeakref(StringRef, SMLoc);
bool ParseDirectiveSymbolAttribute(StringRef, SMLoc);
+ bool ParseDirectiveSubsection(StringRef, SMLoc);
private:
bool ParseSectionName(StringRef &SectionName);
+ bool ParseSectionArguments(bool IsPush);
};
}
@@ -191,12 +194,15 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
unsigned Flags, SectionKind Kind) {
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in section switching directive");
- Lex();
+ const MCExpr *Subsection = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getParser().parseExpression(Subsection))
+ return true;
+ }
getStreamer().SwitchSection(getContext().getELFSection(
- Section, Type, Flags, Kind));
+ Section, Type, Flags, Kind),
+ Subsection);
return false;
}
@@ -316,7 +322,7 @@ static int parseSectionFlags(StringRef flagsStr) {
bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
getStreamer().PushSection();
- if (ParseDirectiveSection(s, loc)) {
+ if (ParseSectionArguments(/*IsPush=*/true)) {
getStreamer().PopSection();
return true;
}
@@ -332,6 +338,10 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
// FIXME: This is a work in progress.
bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+ return ParseSectionArguments(/*IsPush=*/false);
+}
+
+bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
StringRef SectionName;
if (ParseSectionName(SectionName))
@@ -341,6 +351,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
int64_t Size = 0;
StringRef GroupName;
unsigned Flags = 0;
+ const MCExpr *Subsection = 0;
// Set the defaults first.
if (SectionName == ".fini" || SectionName == ".init" ||
@@ -352,6 +363,14 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
if (getLexer().is(AsmToken::Comma)) {
Lex();
+ if (IsPush && getLexer().isNot(AsmToken::String)) {
+ if (getParser().parseExpression(Subsection))
+ return true;
+ if (getLexer().isNot(AsmToken::Comma))
+ goto EndStmt;
+ Lex();
+ }
+
if (getLexer().isNot(AsmToken::String))
return TokError("expected string in directive");
@@ -408,6 +427,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
}
}
+EndStmt:
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
@@ -444,15 +464,16 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
SectionKind Kind = computeSectionKind(Flags);
getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
Flags, Kind, Size,
- GroupName));
+ GroupName),
+ Subsection);
return false;
}
bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
- const MCSection *PreviousSection = getStreamer().getPreviousSection();
- if (PreviousSection == NULL)
+ MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection.first == NULL)
return TokError(".previous without corresponding .section");
- getStreamer().SwitchSection(PreviousSection);
+ getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
return false;
}
@@ -613,6 +634,20 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
return false;
}
+bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) {
+ const MCExpr *Subsection = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getParser().parseExpression(Subsection))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getStreamer().SubSection(Subsection);
+ return false;
+}
+
namespace llvm {
MCAsmParserExtension *createELFAsmParser() {
diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp
index 0e04c55..8ae724f 100644
--- a/contrib/llvm/lib/MC/MCPureStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp
@@ -12,9 +12,8 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
-// FIXME: Remove this.
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
@@ -113,25 +112,22 @@ void MCPureStreamer::InitSections() {
}
void MCPureStreamer::InitToTextSection() {
- // FIMXE: To what!?
- SwitchSection(getContext().getMachOSection("__TEXT", "__text",
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- 0, SectionKind::getText()));
+ SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(getCurrentSection() && "Cannot emit before setting section!");
+ assert(getCurrentSection().first && "Cannot emit before setting section!");
- Symbol->setSection(*getCurrentSection());
+ Symbol->setSection(*getCurrentSection().first);
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
// We have to create a new fragment if this is an atom defining symbol,
// fragments cannot span atoms.
if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
- new MCDataFragment(getCurrentSectionData());
+ insert(new MCDataFragment());
// FIXME: This is wasteful, we don't necessarily need to create a data
// fragment. Instead, we should mark the symbol as pointing into the data
@@ -166,8 +162,7 @@ void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
// MCObjectStreamer.
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
- new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
- getCurrentSectionData());
+ insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
// Update the maximum alignment on the current section if necessary.
if (ByteAlignment > getCurrentSectionData()->getAlignment())
@@ -180,8 +175,8 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
// MCObjectStreamer.
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
- MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
- getCurrentSectionData());
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit);
+ insert(F);
F->setEmitNops(true);
// Update the maximum alignment on the current section if necessary.
@@ -191,13 +186,13 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
- new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+ insert(new MCOrgFragment(*Offset, Value));
return false;
}
void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
- MCRelaxableFragment *IF =
- new MCRelaxableFragment(Inst, getCurrentSectionData());
+ MCRelaxableFragment *IF = new MCRelaxableFragment(Inst);
+ insert(IF);
// Add the fixups and data.
//
diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
index aac9377..6cedf06 100644
--- a/contrib/llvm/lib/MC/MCSectionCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
@@ -29,7 +29,8 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
}
void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS) const {
+ raw_ostream &OS,
+ const MCExpr *Subsection) const {
// standard sections don't require the '.section'
if (ShouldOmitSectionDirective(SectionName, MAI)) {
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
index 0775cfa..bf1a984 100644
--- a/contrib/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,10 +33,14 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
}
void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS) const {
+ raw_ostream &OS,
+ const MCExpr *Subsection) const {
if (ShouldOmitSectionDirective(SectionName, MAI)) {
- OS << '\t' << getSectionName() << '\n';
+ OS << '\t' << getSectionName();
+ if (Subsection)
+ OS << '\t' << *Subsection;
+ OS << '\n';
return;
}
@@ -129,6 +134,9 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
if (Flags & ELF::SHF_GROUP)
OS << "," << Group->getName() << ",comdat";
OS << '\n';
+
+ if (Subsection)
+ OS << "\t.subsection\t" << *Subsection << '\n';
}
bool MCSectionELF::UseCodeAlign() const {
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
index fc32315..8704513 100644
--- a/contrib/llvm/lib/MC/MCSectionMachO.cpp
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -91,7 +91,8 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
}
void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS) const {
+ raw_ostream &OS,
+ const MCExpr *Subsection) const {
OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
// Get the section type and attributes.
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index d02e553..8f1895e 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -24,8 +24,7 @@ using namespace llvm;
MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
: Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
- const MCSection *section = 0;
- SectionStack.push_back(std::make_pair(section, section));
+ SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
MCStreamer::~MCStreamer() {
@@ -36,13 +35,13 @@ MCStreamer::~MCStreamer() {
void MCStreamer::reset() {
for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
delete W64UnwindInfos[i];
+ W64UnwindInfos.clear();
EmitEHFrame = true;
EmitDebugFrame = false;
CurrentW64UnwindInfo = 0;
LastSymbol = 0;
- const MCSection *section = 0;
SectionStack.clear();
- SectionStack.push_back(std::make_pair(section, section));
+ SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
@@ -188,15 +187,15 @@ void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
void MCStreamer::EmitLabel(MCSymbol *Symbol) {
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(getCurrentSection() && "Cannot emit before setting section!");
- Symbol->setSection(*getCurrentSection());
+ assert(getCurrentSection().first && "Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection().first);
LastSymbol = Symbol;
}
void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) {
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(getCurrentSection() && "Cannot emit before setting section!");
- Symbol->setSection(*getCurrentSection());
+ assert(getCurrentSection().first && "Cannot emit before setting section!");
+ Symbol->setSection(*getCurrentSection().first);
LastSymbol = Symbol;
}
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 6dffed7..518b59e 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -147,8 +147,7 @@ public:
object_t *createCOFFEntity(StringRef Name, list_t &List);
void DefineSection(MCSectionData const &SectionData);
- void DefineSymbol(MCSymbol const &Symbol,
- MCSymbolData const &SymbolData,
+ void DefineSymbol(MCSymbolData const &SymbolData,
MCAssembler &Assembler);
void MakeSymbolReal(COFFSymbol &S, size_t Index);
@@ -410,25 +409,23 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
/// This function takes a section data object from the assembler
/// and creates the associated COFF symbol staging object.
-void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol,
- MCSymbolData const &SymbolData,
+void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
MCAssembler &Assembler) {
+ MCSymbol const &Symbol = SymbolData.getSymbol();
COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
-
- coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0;
- coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+ SymbolMap[&Symbol] = coff_symbol;
if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
if (Symbol.isVariable()) {
- coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+ const MCSymbolRefExpr *SymRef =
+ dyn_cast<MCSymbolRefExpr>(Symbol.getVariableValue());
- // FIXME: This assert message isn't very good.
- assert(Symbol.getVariableValue()->getKind() == MCExpr::SymbolRef &&
- "Value must be a SymbolRef!");
+ if (!SymRef)
+ report_fatal_error("Weak externals may only alias symbols");
- coff_symbol->Other = GetOrCreateCOFFSymbol(&Symbol);
+ coff_symbol->Other = GetOrCreateCOFFSymbol(&SymRef->getSymbol());
} else {
std::string WeakName = std::string(".weak.")
+ Symbol.getName().str()
@@ -448,23 +445,29 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol,
coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
- }
- // If no storage class was specified in the streamer, define it here.
- if (coff_symbol->Data.StorageClass == 0) {
- bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+ coff_symbol->MCData = &SymbolData;
+ } else {
+ const MCSymbolData &ResSymData =
+ Assembler.getSymbolData(Symbol.AliasedSymbol());
- coff_symbol->Data.StorageClass =
- external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
- }
+ coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0;
+ coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16;
- if (SymbolData.Fragment != NULL)
- coff_symbol->Section =
- SectionMap[&SymbolData.Fragment->getParent()->getSection()];
+ // If no storage class was specified in the streamer, define it here.
+ if (coff_symbol->Data.StorageClass == 0) {
+ bool external = ResSymData.isExternal() || (ResSymData.Fragment == NULL);
- // Bind internal COFF symbol to MC symbol.
- coff_symbol->MCData = &SymbolData;
- SymbolMap[&Symbol] = coff_symbol;
+ coff_symbol->Data.StorageClass =
+ external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+ }
+
+ if (ResSymData.Fragment != NULL)
+ coff_symbol->Section =
+ SectionMap[&ResSymData.Fragment->getParent()->getSection()];
+
+ coff_symbol->MCData = &ResSymData;
+ }
}
/// making a section real involves assigned it a number and putting
@@ -620,9 +623,7 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
e = Asm.symbol_end(); i != e; i++) {
if (ExportSymbol(*i, Asm)) {
- const MCSymbol &Alias = i->getSymbol();
- const MCSymbol &Symbol = Alias.AliasedSymbol();
- DefineSymbol(Alias, Asm.getSymbolData(Symbol), Asm);
+ DefineSymbol(*i, Asm);
}
}
}
@@ -689,13 +690,8 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
++Reloc.Symb->Relocations;
Reloc.Data.VirtualAddress += Fixup.getOffset();
-
- unsigned FixupKind = Fixup.getKind();
-
- if (CrossSection)
- FixupKind = FK_PCRel_4;
-
- Reloc.Data.Type = TargetObjectWriter->getRelocType(FixupKind);
+ Reloc.Data.Type = TargetObjectWriter->getRelocType(Target, Fixup,
+ CrossSection);
// FIXME: Can anyone explain what this does other than adjust for the size
// of the offset?
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index ca90e0e..70fec32 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -429,7 +429,7 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
}
COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
- : ObjectFile(Binary::ID_COFF, Object, ec)
+ : ObjectFile(Binary::ID_COFF, Object)
, Header(0)
, SectionTable(0)
, SymbolTable(0)
@@ -705,8 +705,7 @@ error_code COFFObjectFile::getRelocationNext(DataRefImpl Rel,
}
error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
uint64_t &Res) const {
- Res = toRel(Rel)->VirtualAddress;
- return object_error::success;
+ report_fatal_error("getRelocationAddress not implemented in COFFObjectFile");
}
error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
uint64_t &Res) const {
diff --git a/contrib/llvm/lib/Object/MachOObject.cpp b/contrib/llvm/lib/Object/MachOObject.cpp
deleted file mode 100644
index c9c341a..0000000
--- a/contrib/llvm/lib/Object/MachOObject.cpp
+++ /dev/null
@@ -1,422 +0,0 @@
-//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Object/MachOObject.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SwapByteOrder.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-using namespace llvm::object;
-
-/* Translation Utilities */
-
-template<typename T>
-static void SwapValue(T &Value) {
- Value = sys::SwapByteOrder(Value);
-}
-
-template<typename T>
-static void SwapStruct(T &Value);
-
-template<typename T>
-static void ReadInMemoryStruct(const MachOObject &MOO,
- StringRef Buffer, uint64_t Base,
- InMemoryStruct<T> &Res) {
- typedef T struct_type;
- uint64_t Size = sizeof(struct_type);
-
- // Check that the buffer contains the expected data.
- if (Base + Size > Buffer.size()) {
- Res = 0;
- return;
- }
-
- // Check whether we can return a direct pointer.
- struct_type *Ptr = reinterpret_cast<struct_type *>(
- const_cast<char *>(Buffer.data() + Base));
- if (!MOO.isSwappedEndian()) {
- Res = Ptr;
- return;
- }
-
- // Otherwise, copy the struct and translate the values.
- Res = *Ptr;
- SwapStruct(*Res);
-}
-
-/* *** */
-
-MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
- bool Is64Bit_)
- : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
- IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
- HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
- // Load the common header.
- memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
- if (IsSwappedEndian) {
- SwapValue(Header.Magic);
- SwapValue(Header.CPUType);
- SwapValue(Header.CPUSubtype);
- SwapValue(Header.FileType);
- SwapValue(Header.NumLoadCommands);
- SwapValue(Header.SizeOfLoadCommands);
- SwapValue(Header.Flags);
- }
-
- if (is64Bit()) {
- memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
- sizeof(Header64Ext));
- if (IsSwappedEndian) {
- SwapValue(Header64Ext.Reserved);
- }
- }
-
- // Create the load command array if sane.
- if (getHeader().NumLoadCommands < (1 << 20))
- LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
-}
-
-MachOObject::~MachOObject() {
- delete [] LoadCommands;
-}
-
-MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
- std::string *ErrorStr) {
- // First, check the magic value and initialize the basic object info.
- bool IsLittleEndian = false, Is64Bit = false;
- StringRef Magic = Buffer->getBuffer().slice(0, 4);
- if (Magic == "\xFE\xED\xFA\xCE") {
- } else if (Magic == "\xCE\xFA\xED\xFE") {
- IsLittleEndian = true;
- } else if (Magic == "\xFE\xED\xFA\xCF") {
- Is64Bit = true;
- } else if (Magic == "\xCF\xFA\xED\xFE") {
- IsLittleEndian = true;
- Is64Bit = true;
- } else {
- if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
- return 0;
- }
-
- // Ensure that the at least the full header is present.
- unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
- if (Buffer->getBufferSize() < HeaderSize) {
- if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
- return 0;
- }
-
- OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
- Is64Bit));
-
- // Check for bogus number of load commands.
- if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
- if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
- return 0;
- }
-
- if (ErrorStr) *ErrorStr = "";
- return Object.take();
-}
-
-StringRef MachOObject::getData(size_t Offset, size_t Size) const {
- return Buffer->getBuffer().substr(Offset,Size);
-}
-
-void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
- HasStringTable = true;
- StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
- SLC.StringTableSize);
-}
-
-const MachOObject::LoadCommandInfo &
-MachOObject::getLoadCommandInfo(unsigned Index) const {
- assert(Index < getHeader().NumLoadCommands && "Invalid index!");
-
- // Load the command, if necessary.
- if (Index >= NumLoadedCommands) {
- uint64_t Offset;
- if (Index == 0) {
- Offset = getHeaderSize();
- } else {
- const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
- Offset = Prev.Offset + Prev.Command.Size;
- }
-
- LoadCommandInfo &Info = LoadCommands[Index];
- memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
- sizeof(macho::LoadCommand));
- if (IsSwappedEndian) {
- SwapValue(Info.Command.Type);
- SwapValue(Info.Command.Size);
- }
- Info.Offset = Offset;
- NumLoadedCommands = Index + 1;
- }
-
- return LoadCommands[Index];
-}
-
-template<>
-void SwapStruct(macho::SegmentLoadCommand &Value) {
- SwapValue(Value.Type);
- SwapValue(Value.Size);
- SwapValue(Value.VMAddress);
- SwapValue(Value.VMSize);
- SwapValue(Value.FileOffset);
- SwapValue(Value.FileSize);
- SwapValue(Value.MaxVMProtection);
- SwapValue(Value.InitialVMProtection);
- SwapValue(Value.NumSections);
- SwapValue(Value.Flags);
-}
-void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
- InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
- ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Segment64LoadCommand &Value) {
- SwapValue(Value.Type);
- SwapValue(Value.Size);
- SwapValue(Value.VMAddress);
- SwapValue(Value.VMSize);
- SwapValue(Value.FileOffset);
- SwapValue(Value.FileSize);
- SwapValue(Value.MaxVMProtection);
- SwapValue(Value.InitialVMProtection);
- SwapValue(Value.NumSections);
- SwapValue(Value.Flags);
-}
-void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
- InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
- ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::SymtabLoadCommand &Value) {
- SwapValue(Value.Type);
- SwapValue(Value.Size);
- SwapValue(Value.SymbolTableOffset);
- SwapValue(Value.NumSymbolTableEntries);
- SwapValue(Value.StringTableOffset);
- SwapValue(Value.StringTableSize);
-}
-void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
- InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
- ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::DysymtabLoadCommand &Value) {
- SwapValue(Value.Type);
- SwapValue(Value.Size);
- SwapValue(Value.LocalSymbolsIndex);
- SwapValue(Value.NumLocalSymbols);
- SwapValue(Value.ExternalSymbolsIndex);
- SwapValue(Value.NumExternalSymbols);
- SwapValue(Value.UndefinedSymbolsIndex);
- SwapValue(Value.NumUndefinedSymbols);
- SwapValue(Value.TOCOffset);
- SwapValue(Value.NumTOCEntries);
- SwapValue(Value.ModuleTableOffset);
- SwapValue(Value.NumModuleTableEntries);
- SwapValue(Value.ReferenceSymbolTableOffset);
- SwapValue(Value.NumReferencedSymbolTableEntries);
- SwapValue(Value.IndirectSymbolTableOffset);
- SwapValue(Value.NumIndirectSymbolTableEntries);
- SwapValue(Value.ExternalRelocationTableOffset);
- SwapValue(Value.NumExternalRelocationTableEntries);
- SwapValue(Value.LocalRelocationTableOffset);
- SwapValue(Value.NumLocalRelocationTableEntries);
-}
-void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
- InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
- ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::LinkeditDataLoadCommand &Value) {
- SwapValue(Value.Type);
- SwapValue(Value.Size);
- SwapValue(Value.DataOffset);
- SwapValue(Value.DataSize);
-}
-void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI,
- InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const {
- ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::LinkerOptionsLoadCommand &Value) {
- SwapValue(Value.Type);
- SwapValue(Value.Size);
- SwapValue(Value.Count);
-}
-void MachOObject::ReadLinkerOptionsLoadCommand(const LoadCommandInfo &LCI,
- InMemoryStruct<macho::LinkerOptionsLoadCommand> &Res) const {
- ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
- SwapValue(Value.Index);
-}
-void
-MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
- unsigned Index,
- InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
- uint64_t Offset = (DLC.IndirectSymbolTableOffset +
- Index * sizeof(macho::IndirectSymbolTableEntry));
- ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-
-template<>
-void SwapStruct(macho::Section &Value) {
- SwapValue(Value.Address);
- SwapValue(Value.Size);
- SwapValue(Value.Offset);
- SwapValue(Value.Align);
- SwapValue(Value.RelocationTableOffset);
- SwapValue(Value.NumRelocationTableEntries);
- SwapValue(Value.Flags);
- SwapValue(Value.Reserved1);
- SwapValue(Value.Reserved2);
-}
-void MachOObject::ReadSection(const LoadCommandInfo &LCI,
- unsigned Index,
- InMemoryStruct<macho::Section> &Res) const {
- assert(LCI.Command.Type == macho::LCT_Segment &&
- "Unexpected load command info!");
- uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
- Index * sizeof(macho::Section));
- ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Section64 &Value) {
- SwapValue(Value.Address);
- SwapValue(Value.Size);
- SwapValue(Value.Offset);
- SwapValue(Value.Align);
- SwapValue(Value.RelocationTableOffset);
- SwapValue(Value.NumRelocationTableEntries);
- SwapValue(Value.Flags);
- SwapValue(Value.Reserved1);
- SwapValue(Value.Reserved2);
- SwapValue(Value.Reserved3);
-}
-void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
- unsigned Index,
- InMemoryStruct<macho::Section64> &Res) const {
- assert(LCI.Command.Type == macho::LCT_Segment64 &&
- "Unexpected load command info!");
- uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
- Index * sizeof(macho::Section64));
- ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::RelocationEntry &Value) {
- SwapValue(Value.Word0);
- SwapValue(Value.Word1);
-}
-void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
- unsigned Index,
- InMemoryStruct<macho::RelocationEntry> &Res) const {
- uint64_t Offset = (RelocationTableOffset +
- Index * sizeof(macho::RelocationEntry));
- ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::SymbolTableEntry &Value) {
- SwapValue(Value.StringIndex);
- SwapValue(Value.Flags);
- SwapValue(Value.Value);
-}
-void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
- unsigned Index,
- InMemoryStruct<macho::SymbolTableEntry> &Res) const {
- uint64_t Offset = (SymbolTableOffset +
- Index * sizeof(macho::SymbolTableEntry));
- ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Symbol64TableEntry &Value) {
- SwapValue(Value.StringIndex);
- SwapValue(Value.Flags);
- SwapValue(Value.Value);
-}
-void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
- unsigned Index,
- InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
- uint64_t Offset = (SymbolTableOffset +
- Index * sizeof(macho::Symbol64TableEntry));
- ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::DataInCodeTableEntry &Value) {
- SwapValue(Value.Offset);
- SwapValue(Value.Length);
- SwapValue(Value.Kind);
-}
-void MachOObject::ReadDataInCodeTableEntry(uint64_t TableOffset,
- unsigned Index,
- InMemoryStruct<macho::DataInCodeTableEntry> &Res) const {
- uint64_t Offset = (TableOffset +
- Index * sizeof(macho::DataInCodeTableEntry));
- ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-void MachOObject::ReadULEB128s(uint64_t Index,
- SmallVectorImpl<uint64_t> &Out) const {
- DataExtractor extractor(Buffer->getBuffer(), true, 0);
-
- uint32_t offset = Index;
- uint64_t data = 0;
- while (uint64_t delta = extractor.getULEB128(&offset)) {
- data += delta;
- Out.push_back(data);
- }
-}
-
-/* ** */
-// Object Dumping Facilities
-void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
-void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; }
-
-void MachOObject::printHeader(raw_ostream &O) const {
- O << "('cputype', " << Header.CPUType << ")\n";
- O << "('cpusubtype', " << Header.CPUSubtype << ")\n";
- O << "('filetype', " << Header.FileType << ")\n";
- O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
- O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
- O << "('flag', " << Header.Flags << ")\n";
-
- // Print extended header if 64-bit.
- if (is64Bit())
- O << "('reserved', " << Header64Ext.Reserved << ")\n";
-}
-
-void MachOObject::print(raw_ostream &O) const {
- O << "Header:\n";
- printHeader(O);
- O << "Load Commands:\n";
-
- O << "Buffer:\n";
-}
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
index 6501df9..dfd8d3d 100644
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -15,7 +15,9 @@
#include "llvm/Object/MachO.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cctype>
#include <cstring>
@@ -27,236 +29,560 @@ using namespace object;
namespace llvm {
namespace object {
-MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
- error_code &ec)
- : ObjectFile(Binary::ID_MachO, Object, ec),
- MachOObj(MOO),
- RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
- DataRefImpl DRI;
- moveToNextSection(DRI);
- uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
- while (DRI.d.a < LoadCommandCount) {
- Sections.push_back(DRI);
- DRI.d.b++;
- moveToNextSection(DRI);
+struct SymbolTableEntryBase {
+ uint32_t StringIndex;
+ uint8_t Type;
+ uint8_t SectionIndex;
+ uint16_t Flags;
+};
+
+struct SectionBase {
+ char Name[16];
+ char SegmentName[16];
+};
+
+template<typename T>
+static void SwapValue(T &Value) {
+ Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<>
+void SwapStruct(macho::RelocationEntry &H) {
+ SwapValue(H.Word0);
+ SwapValue(H.Word1);
+}
+
+template<>
+void SwapStruct(macho::LoadCommand &L) {
+ SwapValue(L.Type);
+ SwapValue(L.Size);
+}
+
+template<>
+void SwapStruct(SymbolTableEntryBase &S) {
+ SwapValue(S.StringIndex);
+ SwapValue(S.Flags);
+}
+
+template<>
+void SwapStruct(macho::Section &S) {
+ SwapValue(S.Address);
+ SwapValue(S.Size);
+ SwapValue(S.Offset);
+ SwapValue(S.Align);
+ SwapValue(S.RelocationTableOffset);
+ SwapValue(S.NumRelocationTableEntries);
+ SwapValue(S.Flags);
+ SwapValue(S.Reserved1);
+ SwapValue(S.Reserved2);
+}
+
+template<>
+void SwapStruct(macho::Section64 &S) {
+ SwapValue(S.Address);
+ SwapValue(S.Size);
+ SwapValue(S.Offset);
+ SwapValue(S.Align);
+ SwapValue(S.RelocationTableOffset);
+ SwapValue(S.NumRelocationTableEntries);
+ SwapValue(S.Flags);
+ SwapValue(S.Reserved1);
+ SwapValue(S.Reserved2);
+ SwapValue(S.Reserved3);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &S) {
+ SwapValue(S.StringIndex);
+ SwapValue(S.Flags);
+ SwapValue(S.Value);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &S) {
+ SwapValue(S.StringIndex);
+ SwapValue(S.Flags);
+ SwapValue(S.Value);
+}
+
+template<>
+void SwapStruct(macho::Header &H) {
+ SwapValue(H.Magic);
+ SwapValue(H.CPUType);
+ SwapValue(H.CPUSubtype);
+ SwapValue(H.FileType);
+ SwapValue(H.NumLoadCommands);
+ SwapValue(H.SizeOfLoadCommands);
+ SwapValue(H.Flags);
+}
+
+template<>
+void SwapStruct(macho::Header64Ext &E) {
+ SwapValue(E.Reserved);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &C) {
+ SwapValue(C.Type);
+ SwapValue(C.Size);
+ SwapValue(C.SymbolTableOffset);
+ SwapValue(C.NumSymbolTableEntries);
+ SwapValue(C.StringTableOffset);
+ SwapValue(C.StringTableSize);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &C) {
+ SwapValue(C.Type);
+ SwapValue(C.Size);
+ SwapValue(C.LocalSymbolsIndex);
+ SwapValue(C.NumLocalSymbols);
+ SwapValue(C.ExternalSymbolsIndex);
+ SwapValue(C.NumExternalSymbols);
+ SwapValue(C.UndefinedSymbolsIndex);
+ SwapValue(C.NumUndefinedSymbols);
+ SwapValue(C.TOCOffset);
+ SwapValue(C.NumTOCEntries);
+ SwapValue(C.ModuleTableOffset);
+ SwapValue(C.NumModuleTableEntries);
+ SwapValue(C.ReferenceSymbolTableOffset);
+ SwapValue(C.NumReferencedSymbolTableEntries);
+ SwapValue(C.IndirectSymbolTableOffset);
+ SwapValue(C.NumIndirectSymbolTableEntries);
+ SwapValue(C.ExternalRelocationTableOffset);
+ SwapValue(C.NumExternalRelocationTableEntries);
+ SwapValue(C.LocalRelocationTableOffset);
+ SwapValue(C.NumLocalRelocationTableEntries);
+}
+
+template<>
+void SwapStruct(macho::LinkeditDataLoadCommand &C) {
+ SwapValue(C.Type);
+ SwapValue(C.Size);
+ SwapValue(C.DataOffset);
+ SwapValue(C.DataSize);
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &C) {
+ SwapValue(C.Type);
+ SwapValue(C.Size);
+ SwapValue(C.VMAddress);
+ SwapValue(C.VMSize);
+ SwapValue(C.FileOffset);
+ SwapValue(C.FileSize);
+ SwapValue(C.MaxVMProtection);
+ SwapValue(C.InitialVMProtection);
+ SwapValue(C.NumSections);
+ SwapValue(C.Flags);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &C) {
+ SwapValue(C.Type);
+ SwapValue(C.Size);
+ SwapValue(C.VMAddress);
+ SwapValue(C.VMSize);
+ SwapValue(C.FileOffset);
+ SwapValue(C.FileSize);
+ SwapValue(C.MaxVMProtection);
+ SwapValue(C.InitialVMProtection);
+ SwapValue(C.NumSections);
+ SwapValue(C.Flags);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &C) {
+ SwapValue(C.Index);
+}
+
+template<>
+void SwapStruct(macho::LinkerOptionsLoadCommand &C) {
+ SwapValue(C.Type);
+ SwapValue(C.Size);
+ SwapValue(C.Count);
+}
+
+template<>
+void SwapStruct(macho::DataInCodeTableEntry &C) {
+ SwapValue(C.Offset);
+ SwapValue(C.Length);
+ SwapValue(C.Kind);
+}
+
+template<typename T>
+T getStruct(const MachOObjectFile *O, const char *P) {
+ T Cmd;
+ memcpy(&Cmd, P, sizeof(T));
+ if (O->isLittleEndian() != sys::IsLittleEndianHost)
+ SwapStruct(Cmd);
+ return Cmd;
+}
+
+static uint32_t
+getSegmentLoadCommandNumSections(const MachOObjectFile *O,
+ const MachOObjectFile::LoadCommandInfo &L) {
+ if (O->is64Bit()) {
+ macho::Segment64LoadCommand S = O->getSegment64LoadCommand(L);
+ return S.NumSections;
}
+ macho::SegmentLoadCommand S = O->getSegmentLoadCommand(L);
+ return S.NumSections;
}
+static const char *
+getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
+ unsigned Sec) {
+ uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr);
-ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+ bool Is64 = O->is64Bit();
+ unsigned SegmentLoadSize = Is64 ? sizeof(macho::Segment64LoadCommand) :
+ sizeof(macho::SegmentLoadCommand);
+ unsigned SectionSize = Is64 ? sizeof(macho::Section64) :
+ sizeof(macho::Section);
+
+ uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize;
+ return reinterpret_cast<const char*>(SectionAddr);
+}
+
+static const char *getPtr(const MachOObjectFile *O, size_t Offset) {
+ return O->getData().substr(Offset, 1).data();
+}
+
+static SymbolTableEntryBase
+getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) {
+ const char *P = reinterpret_cast<const char *>(DRI.p);
+ return getStruct<SymbolTableEntryBase>(O, P);
+}
+
+static StringRef parseSegmentOrSectionName(const char *P) {
+ if (P[15] == 0)
+ // Null terminated.
+ return P;
+ // Not null terminated, so this is a 16 char string.
+ return StringRef(P, 16);
+}
+
+// Helper to advance a section or symbol iterator multiple increments at a time.
+template<class T>
+static error_code advance(T &it, size_t Val) {
error_code ec;
- std::string Err;
- MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
- if (!MachOObj)
- return NULL;
- // MachOObject takes ownership of the Buffer we passed to it, and
- // MachOObjectFile does, too, so we need to make sure they don't get the
- // same object. A MemoryBuffer is cheap (it's just a reference to memory,
- // not a copy of the memory itself), so just make a new copy here for
- // the MachOObjectFile.
- MemoryBuffer *NewBuffer =
- MemoryBuffer::getMemBuffer(Buffer->getBuffer(),
- Buffer->getBufferIdentifier(), false);
- return new MachOObjectFile(NewBuffer, MachOObj, ec);
-}
-
-/*===-- Symbols -----------------------------------------------------------===*/
-
-void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const {
- uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
- while (DRI.d.a < LoadCommandCount) {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- if (LCI.Command.Type == macho::LCT_Symtab) {
- InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
- MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
- if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries)
- return;
+ while (Val--) {
+ it.increment(ec);
+ }
+ return ec;
+}
+
+template<class T>
+static void advanceTo(T &it, size_t Val) {
+ if (error_code ec = advance(it, Val))
+ report_fatal_error(ec.message());
+}
+
+static unsigned getCPUType(const MachOObjectFile *O) {
+ return O->getHeader().CPUType;
+}
+
+static void printRelocationTargetName(const MachOObjectFile *O,
+ const macho::RelocationEntry &RE,
+ raw_string_ostream &fmt) {
+ bool IsScattered = O->isRelocationScattered(RE);
+
+ // Target of a scattered relocation is an address. In the interest of
+ // generating pretty output, scan through the symbol table looking for a
+ // symbol that aligns with that address. If we find one, print it.
+ // Otherwise, we just print the hex address of the target.
+ if (IsScattered) {
+ uint32_t Val = O->getPlainRelocationSymbolNum(RE);
+
+ error_code ec;
+ for (symbol_iterator SI = O->begin_symbols(), SE = O->end_symbols();
+ SI != SE; SI.increment(ec)) {
+ if (ec) report_fatal_error(ec.message());
+
+ uint64_t Addr;
+ StringRef Name;
+
+ if ((ec = SI->getAddress(Addr)))
+ report_fatal_error(ec.message());
+ if (Addr != Val) continue;
+ if ((ec = SI->getName(Name)))
+ report_fatal_error(ec.message());
+ fmt << Name;
+ return;
+ }
+
+ // If we couldn't find a symbol that this relocation refers to, try
+ // to find a section beginning instead.
+ for (section_iterator SI = O->begin_sections(), SE = O->end_sections();
+ SI != SE; SI.increment(ec)) {
+ if (ec) report_fatal_error(ec.message());
+
+ uint64_t Addr;
+ StringRef Name;
+
+ if ((ec = SI->getAddress(Addr)))
+ report_fatal_error(ec.message());
+ if (Addr != Val) continue;
+ if ((ec = SI->getName(Name)))
+ report_fatal_error(ec.message());
+ fmt << Name;
+ return;
}
- DRI.d.a++;
- DRI.d.b = 0;
+ fmt << format("0x%x", Val);
+ return;
}
-}
-void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
- InMemoryStruct<macho::SymbolTableEntry> &Res) const {
- InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+ StringRef S;
+ bool isExtern = O->getPlainRelocationExternal(RE);
+ uint64_t Val = O->getAnyRelocationAddress(RE);
- if (RegisteredStringTable != DRI.d.a) {
- MachOObj->RegisterStringTable(*SymtabLoadCmd);
- RegisteredStringTable = DRI.d.a;
+ if (isExtern) {
+ symbol_iterator SI = O->begin_symbols();
+ advanceTo(SI, Val);
+ SI->getName(S);
+ } else {
+ section_iterator SI = O->begin_sections();
+ advanceTo(SI, Val);
+ SI->getName(S);
}
- MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
- Res);
+ fmt << S;
+}
+
+static uint32_t getPlainRelocationAddress(const macho::RelocationEntry &RE) {
+ return RE.Word0;
+}
+
+static unsigned
+getScatteredRelocationAddress(const macho::RelocationEntry &RE) {
+ return RE.Word0 & 0xffffff;
+}
+
+static bool getPlainRelocationPCRel(const MachOObjectFile *O,
+ const macho::RelocationEntry &RE) {
+ if (O->isLittleEndian())
+ return (RE.Word1 >> 24) & 1;
+ return (RE.Word1 >> 7) & 1;
+}
+
+static bool
+getScatteredRelocationPCRel(const MachOObjectFile *O,
+ const macho::RelocationEntry &RE) {
+ return (RE.Word0 >> 30) & 1;
+}
+
+static unsigned getPlainRelocationLength(const MachOObjectFile *O,
+ const macho::RelocationEntry &RE) {
+ if (O->isLittleEndian())
+ return (RE.Word1 >> 25) & 3;
+ return (RE.Word1 >> 5) & 3;
+}
+
+static unsigned
+getScatteredRelocationLength(const macho::RelocationEntry &RE) {
+ return (RE.Word0 >> 28) & 3;
}
-void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
- InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
- InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+static unsigned getPlainRelocationType(const MachOObjectFile *O,
+ const macho::RelocationEntry &RE) {
+ if (O->isLittleEndian())
+ return RE.Word1 >> 28;
+ return RE.Word1 & 0xf;
+}
- if (RegisteredStringTable != DRI.d.a) {
- MachOObj->RegisterStringTable(*SymtabLoadCmd);
- RegisteredStringTable = DRI.d.a;
+static unsigned getScatteredRelocationType(const macho::RelocationEntry &RE) {
+ return (RE.Word0 >> 24) & 0xf;
+}
+
+static uint32_t getSectionFlags(const MachOObjectFile *O,
+ DataRefImpl Sec) {
+ if (O->is64Bit()) {
+ macho::Section64 Sect = O->getSection64(Sec);
+ return Sect.Flags;
}
+ macho::Section Sect = O->getSection(Sec);
+ return Sect.Flags;
+}
+
+MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
+ bool IsLittleEndian, bool Is64bits,
+ error_code &ec)
+ : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
+ SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
+ uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
+ macho::LoadCommandType SegmentLoadType = is64Bit() ?
+ macho::LCT_Segment64 : macho::LCT_Segment;
+
+ MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo();
+ for (unsigned I = 0; ; ++I) {
+ if (Load.C.Type == macho::LCT_Symtab) {
+ assert(!SymtabLoadCmd && "Multiple symbol tables");
+ SymtabLoadCmd = Load.Ptr;
+ } else if (Load.C.Type == macho::LCT_Dysymtab) {
+ assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
+ DysymtabLoadCmd = Load.Ptr;
+ } else if (Load.C.Type == SegmentLoadType) {
+ uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
+ for (unsigned J = 0; J < NumSections; ++J) {
+ const char *Sec = getSectionPtr(this, Load, J);
+ Sections.push_back(Sec);
+ }
+ }
- MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
- Res);
+ if (I == LoadCommandCount - 1)
+ break;
+ else
+ Load = getNextLoadCommandInfo(Load);
+ }
}
+error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb,
+ SymbolRef &Res) const {
+ unsigned SymbolTableEntrySize = is64Bit() ?
+ sizeof(macho::Symbol64TableEntry) :
+ sizeof(macho::SymbolTableEntry);
+ Symb.p += SymbolTableEntrySize;
+ Res = SymbolRef(Symb, this);
+ return object_error::success;
+}
-error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
- SymbolRef &Result) const {
- DRI.d.b++;
- moveToNextSymbol(DRI);
- Result = SymbolRef(DRI, this);
+error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
+ StringRef &Res) const {
+ StringRef StringTable = getStringTableData();
+ SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+ const char *Start = &StringTable.data()[Entry.StringIndex];
+ Res = StringRef(Start);
return object_error::success;
}
-error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
- StringRef &Result) const {
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(DRI, Entry);
- Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
+ uint64_t &Res) const {
+ if (is64Bit()) {
+ macho::Symbol64TableEntry Entry = getSymbol64TableEntry(Symb);
+ Res = Entry.Value;
} else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(DRI, Entry);
- Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+ macho::SymbolTableEntry Entry = getSymbolTableEntry(Symb);
+ Res = Entry.Value;
}
return object_error::success;
}
-error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
- uint64_t &Result) const {
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(DRI, Entry);
- Result = Entry->Value;
- if (Entry->SectionIndex) {
- InMemoryStruct<macho::Section64> Section;
- getSection64(Sections[Entry->SectionIndex-1], Section);
- Result += Section->Offset - Section->Address;
- }
- } else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(DRI, Entry);
- Result = Entry->Value;
- if (Entry->SectionIndex) {
- InMemoryStruct<macho::Section> Section;
- getSection(Sections[Entry->SectionIndex-1], Section);
- Result += Section->Offset - Section->Address;
+error_code
+MachOObjectFile::getSymbolFileOffset(DataRefImpl Symb,
+ uint64_t &Res) const {
+ SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+ getSymbolAddress(Symb, Res);
+ if (Entry.SectionIndex) {
+ uint64_t Delta;
+ DataRefImpl SecRel;
+ SecRel.d.a = Entry.SectionIndex-1;
+ if (is64Bit()) {
+ macho::Section64 Sec = getSection64(SecRel);
+ Delta = Sec.Offset - Sec.Address;
+ } else {
+ macho::Section Sec = getSection(SecRel);
+ Delta = Sec.Offset - Sec.Address;
}
+
+ Res += Delta;
}
return object_error::success;
}
-error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
- uint64_t &Result) const {
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(DRI, Entry);
- Result = Entry->Value;
+error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
+ uint32_t &Result) const {
+ uint32_t flags;
+ this->getSymbolFlags(DRI, flags);
+ if (flags & SymbolRef::SF_Common) {
+ SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+ Result = 1 << MachO::GET_COMM_ALIGN(Entry.Flags);
} else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(DRI, Entry);
- Result = Entry->Value;
+ Result = 0;
}
return object_error::success;
}
error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
uint64_t &Result) const {
- uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
uint64_t BeginOffset;
uint64_t EndOffset = 0;
uint8_t SectionIndex;
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(DRI, Entry);
- BeginOffset = Entry->Value;
- SectionIndex = Entry->SectionIndex;
- if (!SectionIndex) {
- uint32_t flags = SymbolRef::SF_None;
- getSymbolFlags(DRI, flags);
- if (flags & SymbolRef::SF_Common)
- Result = Entry->Value;
- else
- Result = UnknownAddressOrSize;
- return object_error::success;
- }
- // Unfortunately symbols are unsorted so we need to touch all
- // symbols from load command
- DRI.d.b = 0;
- uint32_t Command = DRI.d.a;
- while (Command == DRI.d.a) {
- moveToNextSymbol(DRI);
- if (DRI.d.a < LoadCommandCount) {
- getSymbol64TableEntry(DRI, Entry);
- if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
- if (!EndOffset || Entry->Value < EndOffset)
- EndOffset = Entry->Value;
- }
- DRI.d.b++;
- }
- } else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(DRI, Entry);
- BeginOffset = Entry->Value;
- SectionIndex = Entry->SectionIndex;
- if (!SectionIndex) {
- uint32_t flags = SymbolRef::SF_None;
- getSymbolFlags(DRI, flags);
- if (flags & SymbolRef::SF_Common)
- Result = Entry->Value;
- else
- Result = UnknownAddressOrSize;
- return object_error::success;
- }
- // Unfortunately symbols are unsorted so we need to touch all
- // symbols from load command
- DRI.d.b = 0;
- uint32_t Command = DRI.d.a;
- while (Command == DRI.d.a) {
- moveToNextSymbol(DRI);
- if (DRI.d.a < LoadCommandCount) {
- getSymbolTableEntry(DRI, Entry);
- if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
- if (!EndOffset || Entry->Value < EndOffset)
- EndOffset = Entry->Value;
- }
- DRI.d.b++;
- }
+
+ SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+ uint64_t Value;
+ getSymbolAddress(DRI, Value);
+
+ BeginOffset = Value;
+
+ SectionIndex = Entry.SectionIndex;
+ if (!SectionIndex) {
+ uint32_t flags = SymbolRef::SF_None;
+ this->getSymbolFlags(DRI, flags);
+ if (flags & SymbolRef::SF_Common)
+ Result = Value;
+ else
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+ // Unfortunately symbols are unsorted so we need to touch all
+ // symbols from load command
+ error_code ec;
+ for (symbol_iterator I = begin_symbols(), E = end_symbols(); I != E;
+ I.increment(ec)) {
+ DataRefImpl DRI = I->getRawDataRefImpl();
+ Entry = getSymbolTableEntryBase(this, DRI);
+ getSymbolAddress(DRI, Value);
+ if (Entry.SectionIndex == SectionIndex && Value > BeginOffset)
+ if (!EndOffset || Value < EndOffset)
+ EndOffset = Value;
}
if (!EndOffset) {
uint64_t Size;
- getSectionSize(Sections[SectionIndex-1], Size);
- getSectionAddress(Sections[SectionIndex-1], EndOffset);
+ DataRefImpl Sec;
+ Sec.d.a = SectionIndex-1;
+ getSectionSize(Sec, Size);
+ getSectionAddress(Sec, EndOffset);
EndOffset += Size;
}
Result = EndOffset - BeginOffset;
return object_error::success;
}
-error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
- char &Result) const {
- uint8_t Type, Flags;
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(DRI, Entry);
- Type = Entry->Type;
- Flags = Entry->Flags;
- } else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(DRI, Entry);
- Type = Entry->Type;
- Flags = Entry->Flags;
+error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
+ SymbolRef::Type &Res) const {
+ SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+ uint8_t n_type = Entry.Type;
+
+ Res = SymbolRef::ST_Other;
+
+ // If this is a STAB debugging symbol, we can do nothing more.
+ if (n_type & MachO::NlistMaskStab) {
+ Res = SymbolRef::ST_Debug;
+ return object_error::success;
+ }
+
+ switch (n_type & MachO::NlistMaskType) {
+ case MachO::NListTypeUndefined :
+ Res = SymbolRef::ST_Unknown;
+ break;
+ case MachO::NListTypeSection :
+ Res = SymbolRef::ST_Function;
+ break;
}
+ return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+ char &Res) const {
+ SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+ uint8_t Type = Entry.Type;
+ uint16_t Flags = Entry.Flags;
char Char;
switch (Type & macho::STF_TypeMask) {
@@ -274,25 +600,16 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
Char = toupper(static_cast<unsigned char>(Char));
- Result = Char;
+ Res = Char;
return object_error::success;
}
error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
uint32_t &Result) const {
- uint16_t MachOFlags;
- uint8_t MachOType;
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(DRI, Entry);
- MachOFlags = Entry->Flags;
- MachOType = Entry->Type;
- } else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(DRI, Entry);
- MachOFlags = Entry->Flags;
- MachOType = Entry->Type;
- }
+ SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+
+ uint8_t MachOType = Entry.Type;
+ uint16_t MachOFlags = Entry.Flags;
// TODO: Correctly set SF_ThreadLocal
Result = SymbolRef::SF_None;
@@ -305,8 +622,12 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
if (MachOType & MachO::NlistMaskExternal) {
Result |= SymbolRef::SF_Global;
- if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
- Result |= SymbolRef::SF_Common;
+ if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) {
+ uint64_t Value;
+ getSymbolAddress(DRI, Value);
+ if (Value)
+ Result |= SymbolRef::SF_Common;
+ }
}
if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef))
@@ -318,55 +639,20 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
return object_error::success;
}
-error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
- section_iterator &Res) const {
- uint8_t index;
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(Symb, Entry);
- index = Entry->SectionIndex;
- } else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(Symb, Entry);
- index = Entry->SectionIndex;
- }
+error_code
+MachOObjectFile::getSymbolSection(DataRefImpl Symb,
+ section_iterator &Res) const {
+ SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+ uint8_t index = Entry.SectionIndex;
- if (index == 0)
+ if (index == 0) {
Res = end_sections();
- else
- Res = section_iterator(SectionRef(Sections[index-1], this));
-
- return object_error::success;
-}
-
-error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
- SymbolRef::Type &Res) const {
- uint8_t n_type;
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(Symb, Entry);
- n_type = Entry->Type;
} else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(Symb, Entry);
- n_type = Entry->Type;
- }
- Res = SymbolRef::ST_Other;
-
- // If this is a STAB debugging symbol, we can do nothing more.
- if (n_type & MachO::NlistMaskStab) {
- Res = SymbolRef::ST_Debug;
- return object_error::success;
+ DataRefImpl DRI;
+ DRI.d.a = index - 1;
+ Res = section_iterator(SectionRef(DRI, this));
}
- switch (n_type & MachO::NlistMaskType) {
- case MachO::NListTypeUndefined :
- Res = SymbolRef::ST_Unknown;
- break;
- case MachO::NListTypeSection :
- Res = SymbolRef::ST_Function;
- break;
- }
return object_error::success;
}
@@ -375,242 +661,101 @@ error_code MachOObjectFile::getSymbolValue(DataRefImpl Symb,
report_fatal_error("getSymbolValue unimplemented in MachOObjectFile");
}
-symbol_iterator MachOObjectFile::begin_symbols() const {
- // DRI.d.a = segment number; DRI.d.b = symbol index.
- DataRefImpl DRI;
- moveToNextSymbol(DRI);
- return symbol_iterator(SymbolRef(DRI, this));
-}
-
-symbol_iterator MachOObjectFile::end_symbols() const {
- DataRefImpl DRI;
- DRI.d.a = MachOObj->getHeader().NumLoadCommands;
- return symbol_iterator(SymbolRef(DRI, this));
-}
-
-symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
- // TODO: implement
- report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
-}
-
-symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
- // TODO: implement
- report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::begin_libraries_needed() const {
- // TODO: implement
- report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::end_libraries_needed() const {
- // TODO: implement
- report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-StringRef MachOObjectFile::getLoadName() const {
- // TODO: Implement
- report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
-}
-
-/*===-- Sections ----------------------------------------------------------===*/
-
-void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
- uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
- while (DRI.d.a < LoadCommandCount) {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- if (LCI.Command.Type == macho::LCT_Segment) {
- InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd;
- MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd);
- if (DRI.d.b < SegmentLoadCmd->NumSections)
- return;
- } else if (LCI.Command.Type == macho::LCT_Segment64) {
- InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd;
- MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd);
- if (DRI.d.b < Segment64LoadCmd->NumSections)
- return;
- }
-
- DRI.d.a++;
- DRI.d.b = 0;
- }
-}
-
-error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
- SectionRef &Result) const {
- DRI.d.b++;
- moveToNextSection(DRI);
- Result = SectionRef(DRI, this);
+error_code MachOObjectFile::getSectionNext(DataRefImpl Sec,
+ SectionRef &Res) const {
+ Sec.d.a++;
+ Res = SectionRef(Sec, this);
return object_error::success;
}
-void
-MachOObjectFile::getSection(DataRefImpl DRI,
- InMemoryStruct<macho::Section> &Res) const {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- MachOObj->ReadSection(LCI, DRI.d.b, Res);
-}
-
-std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
- SectionList::const_iterator loc =
- std::find(Sections.begin(), Sections.end(), Sec);
- assert(loc != Sections.end() && "Sec is not a valid section!");
- return std::distance(Sections.begin(), loc);
-}
-
-void
-MachOObjectFile::getSection64(DataRefImpl DRI,
- InMemoryStruct<macho::Section64> &Res) const {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- MachOObj->ReadSection64(LCI, DRI.d.b, Res);
-}
-
-static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- if (LCI.Command.Type == macho::LCT_Segment64)
- return true;
- assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
- return false;
-}
-
-static StringRef parseSegmentOrSectionName(const char *P) {
- if (P[15] == 0)
- // Null terminated.
- return P;
- // Not null terminated, so this is a 16 char string.
- return StringRef(P, 16);
-}
-
-error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
- StringRef &Result) const {
- if (is64BitLoadCommand(MachOObj.get(), DRI)) {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
- DRI.d.b * sizeof(macho::Section64);
- StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
- const macho::Section64 *sec =
- reinterpret_cast<const macho::Section64*>(Data.data());
- Result = parseSegmentOrSectionName(sec->Name);
- } else {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
- unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
- DRI.d.b * sizeof(macho::Section);
- StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
- const macho::Section *sec =
- reinterpret_cast<const macho::Section*>(Data.data());
- Result = parseSegmentOrSectionName(sec->Name);
- }
+error_code
+MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const {
+ ArrayRef<char> Raw = getSectionRawName(Sec);
+ Result = parseSegmentOrSectionName(Raw.data());
return object_error::success;
}
-error_code MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec,
- StringRef &Res) const {
- if (is64BitLoadCommand(MachOObj.get(), Sec)) {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
- unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
- Sec.d.b * sizeof(macho::Section64);
- StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
- const macho::Section64 *sec =
- reinterpret_cast<const macho::Section64*>(Data.data());
- Res = parseSegmentOrSectionName(sec->SegmentName);
+error_code
+MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const {
+ if (is64Bit()) {
+ macho::Section64 Sect = getSection64(Sec);
+ Res = Sect.Address;
} else {
- LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
- unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
- Sec.d.b * sizeof(macho::Section);
- StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
- const macho::Section *sec =
- reinterpret_cast<const macho::Section*>(Data.data());
- Res = parseSegmentOrSectionName(sec->SegmentName);
+ macho::Section Sect = getSection(Sec);
+ Res = Sect.Address;
}
return object_error::success;
}
-error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
- uint64_t &Result) const {
- if (is64BitLoadCommand(MachOObj.get(), DRI)) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(DRI, Sect);
- Result = Sect->Address;
+error_code
+MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const {
+ if (is64Bit()) {
+ macho::Section64 Sect = getSection64(Sec);
+ Res = Sect.Size;
} else {
- InMemoryStruct<macho::Section> Sect;
- getSection(DRI, Sect);
- Result = Sect->Address;
+ macho::Section Sect = getSection(Sec);
+ Res = Sect.Size;
}
- return object_error::success;
-}
-error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
- uint64_t &Result) const {
- if (is64BitLoadCommand(MachOObj.get(), DRI)) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(DRI, Sect);
- Result = Sect->Size;
- } else {
- InMemoryStruct<macho::Section> Sect;
- getSection(DRI, Sect);
- Result = Sect->Size;
- }
return object_error::success;
}
-error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
- StringRef &Result) const {
- if (is64BitLoadCommand(MachOObj.get(), DRI)) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(DRI, Sect);
- Result = MachOObj->getData(Sect->Offset, Sect->Size);
+error_code
+MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const {
+ uint32_t Offset;
+ uint64_t Size;
+
+ if (is64Bit()) {
+ macho::Section64 Sect = getSection64(Sec);
+ Offset = Sect.Offset;
+ Size = Sect.Size;
} else {
- InMemoryStruct<macho::Section> Sect;
- getSection(DRI, Sect);
- Result = MachOObj->getData(Sect->Offset, Sect->Size);
+ macho::Section Sect =getSection(Sec);
+ Offset = Sect.Offset;
+ Size = Sect.Size;
}
+
+ Res = this->getData().substr(Offset, Size);
return object_error::success;
}
-error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
- uint64_t &Result) const {
- if (is64BitLoadCommand(MachOObj.get(), DRI)) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(DRI, Sect);
- Result = uint64_t(1) << Sect->Align;
+error_code
+MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const {
+ uint32_t Align;
+ if (is64Bit()) {
+ macho::Section64 Sect = getSection64(Sec);
+ Align = Sect.Align;
} else {
- InMemoryStruct<macho::Section> Sect;
- getSection(DRI, Sect);
- Result = uint64_t(1) << Sect->Align;
+ macho::Section Sect = getSection(Sec);
+ Align = Sect.Align;
}
+
+ Res = uint64_t(1) << Align;
return object_error::success;
}
-error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
- bool &Result) const {
- if (is64BitLoadCommand(MachOObj.get(), DRI)) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(DRI, Sect);
- Result = Sect->Flags & macho::SF_PureInstructions;
- } else {
- InMemoryStruct<macho::Section> Sect;
- getSection(DRI, Sect);
- Result = Sect->Flags & macho::SF_PureInstructions;
- }
+error_code
+MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const {
+ uint32_t Flags = getSectionFlags(this, Sec);
+ Res = Flags & macho::SF_PureInstructions;
return object_error::success;
}
-error_code MachOObjectFile::isSectionData(DataRefImpl DRI,
- bool &Result) const {
+error_code MachOObjectFile::isSectionData(DataRefImpl DRI, bool &Result) const {
// FIXME: Unimplemented.
Result = false;
return object_error::success;
}
-error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI,
- bool &Result) const {
+error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, bool &Result) const {
// FIXME: Unimplemented.
Result = false;
return object_error::success;
}
-error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
- bool &Result) const {
+error_code
+MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+ bool &Result) const {
// FIXME: Unimplemented.
Result = true;
return object_error::success;
@@ -623,22 +768,12 @@ error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
return object_error::success;
}
-error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI,
- bool &Result) const {
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(DRI, Sect);
- unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
- Result = (SectionType == MachO::SectionTypeZeroFill ||
- SectionType == MachO::SectionTypeZeroFillLarge);
- } else {
- InMemoryStruct<macho::Section> Sect;
- getSection(DRI, Sect);
- unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
- Result = (SectionType == MachO::SectionTypeZeroFill ||
- SectionType == MachO::SectionTypeZeroFillLarge);
- }
-
+error_code
+MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const {
+ uint32_t Flags = getSectionFlags(this, Sec);
+ unsigned SectionType = Flags & MachO::SectionFlagMaskSectionType;
+ Res = SectionType == MachO::SectionTypeZeroFill ||
+ SectionType == MachO::SectionTypeZeroFillLarge;
return object_error::success;
}
@@ -653,11 +788,11 @@ error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
return object_error::success;
}
-error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
- DataRefImpl Symb,
- bool &Result) const {
+error_code
+MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+ bool &Result) const {
SymbolRef::Type ST;
- getSymbolType(Symb, ST);
+ this->getSymbolType(Symb, ST);
if (ST == SymbolRef::ST_Unknown) {
Result = false;
return object_error::success;
@@ -668,164 +803,107 @@ error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
getSectionSize(Sec, SectEnd);
SectEnd += SectBegin;
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Symbol64TableEntry> Entry;
- getSymbol64TableEntry(Symb, Entry);
- uint64_t SymAddr= Entry->Value;
- Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
- } else {
- InMemoryStruct<macho::SymbolTableEntry> Entry;
- getSymbolTableEntry(Symb, Entry);
- uint64_t SymAddr= Entry->Value;
- Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
- }
+ uint64_t SymAddr;
+ getSymbolAddress(Symb, SymAddr);
+ Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
return object_error::success;
}
relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
- DataRefImpl ret;
- ret.d.b = getSectionIndex(Sec);
- return relocation_iterator(RelocationRef(ret, this));
-}
-relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
- uint32_t last_reloc;
- if (is64BitLoadCommand(MachOObj.get(), Sec)) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(Sec, Sect);
- last_reloc = Sect->NumRelocationTableEntries;
+ uint32_t Offset;
+ if (is64Bit()) {
+ macho::Section64 Sect = getSection64(Sec);
+ Offset = Sect.RelocationTableOffset;
} else {
- InMemoryStruct<macho::Section> Sect;
- getSection(Sec, Sect);
- last_reloc = Sect->NumRelocationTableEntries;
+ macho::Section Sect = getSection(Sec);
+ Offset = Sect.RelocationTableOffset;
}
- DataRefImpl ret;
- ret.d.a = last_reloc;
- ret.d.b = getSectionIndex(Sec);
- return relocation_iterator(RelocationRef(ret, this));
-}
-
-section_iterator MachOObjectFile::begin_sections() const {
- DataRefImpl DRI;
- moveToNextSection(DRI);
- return section_iterator(SectionRef(DRI, this));
-}
-section_iterator MachOObjectFile::end_sections() const {
- DataRefImpl DRI;
- DRI.d.a = MachOObj->getHeader().NumLoadCommands;
- return section_iterator(SectionRef(DRI, this));
+ DataRefImpl Ret;
+ Ret.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+ return relocation_iterator(RelocationRef(Ret, this));
}
-/*===-- Relocations -------------------------------------------------------===*/
-
-void MachOObjectFile::
-getRelocation(DataRefImpl Rel,
- InMemoryStruct<macho::RelocationEntry> &Res) const {
- uint32_t relOffset;
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(Sections[Rel.d.b], Sect);
- relOffset = Sect->RelocationTableOffset;
+relocation_iterator
+MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+ uint32_t Offset;
+ uint32_t Num;
+ if (is64Bit()) {
+ macho::Section64 Sect = getSection64(Sec);
+ Offset = Sect.RelocationTableOffset;
+ Num = Sect.NumRelocationTableEntries;
} else {
- InMemoryStruct<macho::Section> Sect;
- getSection(Sections[Rel.d.b], Sect);
- relOffset = Sect->RelocationTableOffset;
+ macho::Section Sect = getSection(Sec);
+ Offset = Sect.RelocationTableOffset;
+ Num = Sect.NumRelocationTableEntries;
}
- MachOObj->ReadRelocationEntry(relOffset, Rel.d.a, Res);
+
+ const macho::RelocationEntry *P =
+ reinterpret_cast<const macho::RelocationEntry*>(getPtr(this, Offset));
+
+ DataRefImpl Ret;
+ Ret.p = reinterpret_cast<uintptr_t>(P + Num);
+ return relocation_iterator(RelocationRef(Ret, this));
}
+
error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
RelocationRef &Res) const {
- ++Rel.d.a;
+ const macho::RelocationEntry *P =
+ reinterpret_cast<const macho::RelocationEntry *>(Rel.p);
+ Rel.p = reinterpret_cast<uintptr_t>(P + 1);
Res = RelocationRef(Rel, this);
return object_error::success;
}
-error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
- uint64_t &Res) const {
- const uint8_t* sectAddress = 0;
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(Sections[Rel.d.b], Sect);
- sectAddress += Sect->Address;
- } else {
- InMemoryStruct<macho::Section> Sect;
- getSection(Sections[Rel.d.b], Sect);
- sectAddress += Sect->Address;
- }
- InMemoryStruct<macho::RelocationEntry> RE;
- getRelocation(Rel, RE);
-
- unsigned Arch = getArch();
- bool isScattered = (Arch != Triple::x86_64) &&
- (RE->Word0 & macho::RF_Scattered);
- uint64_t RelAddr = 0;
- if (isScattered)
- RelAddr = RE->Word0 & 0xFFFFFF;
- else
- RelAddr = RE->Word0;
- Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
- return object_error::success;
+error_code
+MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const {
+ report_fatal_error("getRelocationAddress not implemented in MachOObjectFile");
}
+
error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
uint64_t &Res) const {
- InMemoryStruct<macho::RelocationEntry> RE;
- getRelocation(Rel, RE);
-
- unsigned Arch = getArch();
- bool isScattered = (Arch != Triple::x86_64) &&
- (RE->Word0 & macho::RF_Scattered);
- if (isScattered)
- Res = RE->Word0 & 0xFFFFFF;
- else
- Res = RE->Word0;
+ macho::RelocationEntry RE = getRelocation(Rel);
+ Res = getAnyRelocationAddress(RE);
return object_error::success;
}
-error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
- SymbolRef &Res) const {
- InMemoryStruct<macho::RelocationEntry> RE;
- getRelocation(Rel, RE);
- uint32_t SymbolIdx = RE->Word1 & 0xffffff;
- bool isExtern = (RE->Word1 >> 27) & 1;
- DataRefImpl Sym;
- moveToNextSymbol(Sym);
- if (isExtern) {
- for (unsigned i = 0; i < SymbolIdx; i++) {
- Sym.d.b++;
- moveToNextSymbol(Sym);
- assert(Sym.d.a < MachOObj->getHeader().NumLoadCommands &&
- "Relocation symbol index out of range!");
- }
+error_code
+MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const {
+ macho::RelocationEntry RE = getRelocation(Rel);
+ uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE);
+ bool isExtern = getPlainRelocationExternal(RE);
+ if (!isExtern) {
+ Res = *end_symbols();
+ return object_error::success;
}
+
+ macho::SymtabLoadCommand S = getSymtabLoadCommand();
+ unsigned SymbolTableEntrySize = is64Bit() ?
+ sizeof(macho::Symbol64TableEntry) :
+ sizeof(macho::SymbolTableEntry);
+ uint64_t Offset = S.SymbolTableOffset + SymbolIdx * SymbolTableEntrySize;
+ DataRefImpl Sym;
+ Sym.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
Res = SymbolRef(Sym, this);
return object_error::success;
}
+
error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
uint64_t &Res) const {
- InMemoryStruct<macho::RelocationEntry> RE;
- getRelocation(Rel, RE);
- Res = RE->Word0;
- Res <<= 32;
- Res |= RE->Word1;
+ macho::RelocationEntry RE = getRelocation(Rel);
+ Res = getAnyRelocationType(RE);
return object_error::success;
}
-error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const {
- // TODO: Support scattered relocations.
- StringRef res;
- InMemoryStruct<macho::RelocationEntry> RE;
- getRelocation(Rel, RE);
- unsigned Arch = getArch();
- bool isScattered = (Arch != Triple::x86_64) &&
- (RE->Word0 & macho::RF_Scattered);
+error_code
+MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
+ StringRef res;
+ uint64_t RType;
+ getRelocationType(Rel, RType);
- unsigned r_type;
- if (isScattered)
- r_type = (RE->Word0 >> 24) & 0xF;
- else
- r_type = (RE->Word1 >> 28) & 0xF;
+ unsigned Arch = this->getArch();
switch (Arch) {
case Triple::x86: {
@@ -837,10 +915,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
"GENERIC_RELOC_LOCAL_SECTDIFF",
"GENERIC_RELOC_TLV" };
- if (r_type > 6)
+ if (RType > 6)
res = "Unknown";
else
- res = Table[r_type];
+ res = Table[RType];
break;
}
case Triple::x86_64: {
@@ -856,10 +934,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
"X86_64_RELOC_SIGNED_4",
"X86_64_RELOC_TLV" };
- if (r_type > 9)
+ if (RType > 9)
res = "Unknown";
else
- res = Table[r_type];
+ res = Table[RType];
break;
}
case Triple::arm: {
@@ -875,10 +953,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
"ARM_RELOC_HALF",
"ARM_RELOC_HALF_SECTDIFF" };
- if (r_type > 9)
+ if (RType > 9)
res = "Unknown";
else
- res = Table[r_type];
+ res = Table[RType];
break;
}
case Triple::ppc: {
@@ -900,7 +978,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
"PPC_RELOC_LO14_SECTDIFF",
"PPC_RELOC_LOCAL_SECTDIFF" };
- res = Table[r_type];
+ res = Table[RType];
break;
}
case Triple::UnknownArch:
@@ -910,193 +988,79 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
Result.append(res.begin(), res.end());
return object_error::success;
}
+
error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
int64_t &Res) const {
- InMemoryStruct<macho::RelocationEntry> RE;
- getRelocation(Rel, RE);
- bool isExtern = (RE->Word1 >> 27) & 1;
Res = 0;
- if (!isExtern) {
- const uint8_t* sectAddress = base();
- if (MachOObj->is64Bit()) {
- InMemoryStruct<macho::Section64> Sect;
- getSection64(Sections[Rel.d.b], Sect);
- sectAddress += Sect->Offset;
- } else {
- InMemoryStruct<macho::Section> Sect;
- getSection(Sections[Rel.d.b], Sect);
- sectAddress += Sect->Offset;
- }
- Res = reinterpret_cast<uintptr_t>(sectAddress);
- }
return object_error::success;
}
-// Helper to advance a section or symbol iterator multiple increments at a time.
-template<class T>
-error_code advance(T &it, size_t Val) {
- error_code ec;
- while (Val--) {
- it.increment(ec);
- }
- return ec;
-}
-
-template<class T>
-void advanceTo(T &it, size_t Val) {
- if (error_code ec = advance(it, Val))
- report_fatal_error(ec.message());
-}
-
-void MachOObjectFile::printRelocationTargetName(
- InMemoryStruct<macho::RelocationEntry>& RE,
- raw_string_ostream &fmt) const {
- unsigned Arch = getArch();
- bool isScattered = (Arch != Triple::x86_64) &&
- (RE->Word0 & macho::RF_Scattered);
-
- // Target of a scattered relocation is an address. In the interest of
- // generating pretty output, scan through the symbol table looking for a
- // symbol that aligns with that address. If we find one, print it.
- // Otherwise, we just print the hex address of the target.
- if (isScattered) {
- uint32_t Val = RE->Word1;
-
- error_code ec;
- for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE;
- SI.increment(ec)) {
- if (ec) report_fatal_error(ec.message());
-
- uint64_t Addr;
- StringRef Name;
-
- if ((ec = SI->getAddress(Addr)))
- report_fatal_error(ec.message());
- if (Addr != Val) continue;
- if ((ec = SI->getName(Name)))
- report_fatal_error(ec.message());
- fmt << Name;
- return;
- }
-
- // If we couldn't find a symbol that this relocation refers to, try
- // to find a section beginning instead.
- for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE;
- SI.increment(ec)) {
- if (ec) report_fatal_error(ec.message());
-
- uint64_t Addr;
- StringRef Name;
-
- if ((ec = SI->getAddress(Addr)))
- report_fatal_error(ec.message());
- if (Addr != Val) continue;
- if ((ec = SI->getName(Name)))
- report_fatal_error(ec.message());
- fmt << Name;
- return;
- }
-
- fmt << format("0x%x", Val);
- return;
- }
-
- StringRef S;
- bool isExtern = (RE->Word1 >> 27) & 1;
- uint32_t Val = RE->Word1 & 0xFFFFFF;
-
- if (isExtern) {
- symbol_iterator SI = begin_symbols();
- advanceTo(SI, Val);
- SI->getName(S);
- } else {
- section_iterator SI = begin_sections();
- advanceTo(SI, Val);
- SI->getName(S);
- }
-
- fmt << S;
-}
-
-error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
+error_code
+MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
- InMemoryStruct<macho::RelocationEntry> RE;
- getRelocation(Rel, RE);
+ macho::RelocationEntry RE = getRelocation(Rel);
- unsigned Arch = getArch();
- bool isScattered = (Arch != Triple::x86_64) &&
- (RE->Word0 & macho::RF_Scattered);
+ unsigned Arch = this->getArch();
std::string fmtbuf;
raw_string_ostream fmt(fmtbuf);
-
- unsigned Type;
- if (isScattered)
- Type = (RE->Word0 >> 24) & 0xF;
- else
- Type = (RE->Word1 >> 28) & 0xF;
-
- bool isPCRel;
- if (isScattered)
- isPCRel = ((RE->Word0 >> 30) & 1);
- else
- isPCRel = ((RE->Word1 >> 24) & 1);
+ unsigned Type = this->getAnyRelocationType(RE);
+ bool IsPCRel = this->getAnyRelocationPCRel(RE);
// Determine any addends that should be displayed with the relocation.
// These require decoding the relocation type, which is triple-specific.
// X86_64 has entirely custom relocation types.
if (Arch == Triple::x86_64) {
- bool isPCRel = ((RE->Word1 >> 24) & 1);
+ bool isPCRel = getAnyRelocationPCRel(RE);
switch (Type) {
case macho::RIT_X86_64_GOTLoad: // X86_64_RELOC_GOT_LOAD
case macho::RIT_X86_64_GOT: { // X86_64_RELOC_GOT
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt << "@GOT";
if (isPCRel) fmt << "PCREL";
break;
}
case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR
- InMemoryStruct<macho::RelocationEntry> RENext;
DataRefImpl RelNext = Rel;
RelNext.d.a++;
- getRelocation(RelNext, RENext);
+ macho::RelocationEntry RENext = getRelocation(RelNext);
// X86_64_SUBTRACTOR must be followed by a relocation of type
// X86_64_RELOC_UNSIGNED.
// NOTE: Scattered relocations don't exist on x86_64.
- unsigned RType = (RENext->Word1 >> 28) & 0xF;
+ unsigned RType = getAnyRelocationType(RENext);
if (RType != 0)
report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
"X86_64_RELOC_SUBTRACTOR.");
// The X86_64_RELOC_UNSIGNED contains the minuend symbol,
// X86_64_SUBTRACTOR contains to the subtrahend.
- printRelocationTargetName(RENext, fmt);
+ printRelocationTargetName(this, RENext, fmt);
fmt << "-";
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
break;
}
case macho::RIT_X86_64_TLV:
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt << "@TLV";
if (isPCRel) fmt << "P";
break;
case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt << "-1";
break;
case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt << "-2";
break;
case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt << "-4";
break;
default:
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
break;
}
// X86 and ARM share some relocation types in common.
@@ -1106,27 +1070,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info
return object_error::success;
case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF
- InMemoryStruct<macho::RelocationEntry> RENext;
DataRefImpl RelNext = Rel;
RelNext.d.a++;
- getRelocation(RelNext, RENext);
+ macho::RelocationEntry RENext = getRelocation(RelNext);
// X86 sect diff's must be followed by a relocation of type
// GENERIC_RELOC_PAIR.
- bool isNextScattered = (Arch != Triple::x86_64) &&
- (RENext->Word0 & macho::RF_Scattered);
- unsigned RType;
- if (isNextScattered)
- RType = (RENext->Word0 >> 24) & 0xF;
- else
- RType = (RENext->Word1 >> 28) & 0xF;
+ unsigned RType = getAnyRelocationType(RENext);
+
if (RType != 1)
report_fatal_error("Expected GENERIC_RELOC_PAIR after "
"GENERIC_RELOC_SECTDIFF.");
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt << "-";
- printRelocationTargetName(RENext, fmt);
+ printRelocationTargetName(this, RENext, fmt);
break;
}
}
@@ -1136,37 +1094,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
// handled in the generic code.
switch (Type) {
case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF
- InMemoryStruct<macho::RelocationEntry> RENext;
DataRefImpl RelNext = Rel;
RelNext.d.a++;
- getRelocation(RelNext, RENext);
+ macho::RelocationEntry RENext = getRelocation(RelNext);
// X86 sect diff's must be followed by a relocation of type
// GENERIC_RELOC_PAIR.
- bool isNextScattered = (Arch != Triple::x86_64) &&
- (RENext->Word0 & macho::RF_Scattered);
- unsigned RType;
- if (isNextScattered)
- RType = (RENext->Word0 >> 24) & 0xF;
- else
- RType = (RENext->Word1 >> 28) & 0xF;
+ unsigned RType = getAnyRelocationType(RENext);
if (RType != 1)
report_fatal_error("Expected GENERIC_RELOC_PAIR after "
"GENERIC_RELOC_LOCAL_SECTDIFF.");
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt << "-";
- printRelocationTargetName(RENext, fmt);
+ printRelocationTargetName(this, RENext, fmt);
break;
}
case macho::RIT_Generic_TLV: {
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt << "@TLV";
- if (isPCRel) fmt << "P";
+ if (IsPCRel) fmt << "P";
break;
}
default:
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
}
} else { // ARM-specific relocations
switch (Type) {
@@ -1174,33 +1125,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF
// Half relocations steal a bit from the length field to encode
// whether this is an upper16 or a lower16 relocation.
- bool isUpper;
- if (isScattered)
- isUpper = (RE->Word0 >> 28) & 1;
- else
- isUpper = (RE->Word1 >> 25) & 1;
+ bool isUpper = getAnyRelocationLength(RE) >> 1;
if (isUpper)
fmt << ":upper16:(";
else
fmt << ":lower16:(";
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
- InMemoryStruct<macho::RelocationEntry> RENext;
DataRefImpl RelNext = Rel;
RelNext.d.a++;
- getRelocation(RelNext, RENext);
+ macho::RelocationEntry RENext = getRelocation(RelNext);
// ARM half relocs must be followed by a relocation of type
// ARM_RELOC_PAIR.
- bool isNextScattered = (Arch != Triple::x86_64) &&
- (RENext->Word0 & macho::RF_Scattered);
- unsigned RType;
- if (isNextScattered)
- RType = (RENext->Word0 >> 24) & 0xF;
- else
- RType = (RENext->Word1 >> 28) & 0xF;
-
+ unsigned RType = getAnyRelocationType(RENext);
if (RType != 1)
report_fatal_error("Expected ARM_RELOC_PAIR after "
"GENERIC_RELOC_HALF");
@@ -1214,38 +1153,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
// symbol/section pointer of the follow-on relocation.
if (Type == macho::RIT_ARM_HalfDifference) {
fmt << "-";
- printRelocationTargetName(RENext, fmt);
+ printRelocationTargetName(this, RENext, fmt);
}
fmt << ")";
break;
}
default: {
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
}
}
}
} else
- printRelocationTargetName(RE, fmt);
+ printRelocationTargetName(this, RE, fmt);
fmt.flush();
Result.append(fmtbuf.begin(), fmtbuf.end());
return object_error::success;
}
-error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
- bool &Result) const {
- InMemoryStruct<macho::RelocationEntry> RE;
- getRelocation(Rel, RE);
-
+error_code
+MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const {
unsigned Arch = getArch();
- bool isScattered = (Arch != Triple::x86_64) &&
- (RE->Word0 & macho::RF_Scattered);
- unsigned Type;
- if (isScattered)
- Type = (RE->Word0 >> 24) & 0xF;
- else
- Type = (RE->Word1 >> 28) & 0xF;
+ uint64_t Type;
+ getRelocationType(Rel, Type);
Result = false;
@@ -1259,12 +1190,10 @@ error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) {
DataRefImpl RelPrev = Rel;
RelPrev.d.a--;
- InMemoryStruct<macho::RelocationEntry> REPrev;
- getRelocation(RelPrev, REPrev);
-
- unsigned PrevType = (REPrev->Word1 >> 28) & 0xF;
-
- if (PrevType == macho::RIT_X86_64_Subtractor) Result = true;
+ uint64_t PrevType;
+ getRelocationType(RelPrev, PrevType);
+ if (PrevType == macho::RIT_X86_64_Subtractor)
+ Result = true;
}
}
@@ -1281,16 +1210,70 @@ error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
}
+symbol_iterator MachOObjectFile::begin_symbols() const {
+ DataRefImpl DRI;
+ if (!SymtabLoadCmd)
+ return symbol_iterator(SymbolRef(DRI, this));
+
+ macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+ DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.SymbolTableOffset));
+ return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::end_symbols() const {
+ DataRefImpl DRI;
+ if (!SymtabLoadCmd)
+ return symbol_iterator(SymbolRef(DRI, this));
+
+ macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+ unsigned SymbolTableEntrySize = is64Bit() ?
+ sizeof(macho::Symbol64TableEntry) :
+ sizeof(macho::SymbolTableEntry);
+ unsigned Offset = Symtab.SymbolTableOffset +
+ Symtab.NumSymbolTableEntries * SymbolTableEntrySize;
+ DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+ return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
+ // TODO: implement
+ report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+section_iterator MachOObjectFile::begin_sections() const {
+ DataRefImpl DRI;
+ return section_iterator(SectionRef(DRI, this));
+}
+
+section_iterator MachOObjectFile::end_sections() const {
+ DataRefImpl DRI;
+ DRI.d.a = Sections.size();
+ return section_iterator(SectionRef(DRI, this));
+}
-/*===-- Miscellaneous -----------------------------------------------------===*/
+library_iterator MachOObjectFile::begin_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::end_libraries_needed() const {
+ // TODO: implement
+ report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
uint8_t MachOObjectFile::getBytesInAddress() const {
- return MachOObj->is64Bit() ? 8 : 4;
+ return is64Bit() ? 8 : 4;
}
StringRef MachOObjectFile::getFileFormatName() const {
- if (!MachOObj->is64Bit()) {
- switch (MachOObj->getHeader().CPUType) {
+ unsigned CPUType = getCPUType(this);
+ if (!is64Bit()) {
+ switch (CPUType) {
case llvm::MachO::CPUTypeI386:
return "Mach-O 32-bit i386";
case llvm::MachO::CPUTypeARM:
@@ -1298,18 +1281,18 @@ StringRef MachOObjectFile::getFileFormatName() const {
case llvm::MachO::CPUTypePowerPC:
return "Mach-O 32-bit ppc";
default:
- assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+ assert((CPUType & llvm::MachO::CPUArchABI64) == 0 &&
"64-bit object file when we're not 64-bit?");
return "Mach-O 32-bit unknown";
}
}
// Make sure the cpu type has the correct mask.
- assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64)
+ assert((CPUType & llvm::MachO::CPUArchABI64)
== llvm::MachO::CPUArchABI64 &&
"32-bit object file when we're 64-bit?");
- switch (MachOObj->getHeader().CPUType) {
+ switch (CPUType) {
case llvm::MachO::CPUTypeX86_64:
return "Mach-O 64-bit x86-64";
case llvm::MachO::CPUTypePowerPC64:
@@ -1320,7 +1303,7 @@ StringRef MachOObjectFile::getFileFormatName() const {
}
unsigned MachOObjectFile::getArch() const {
- switch (MachOObj->getHeader().CPUType) {
+ switch (getCPUType(this)) {
case llvm::MachO::CPUTypeI386:
return Triple::x86;
case llvm::MachO::CPUTypeX86_64:
@@ -1336,5 +1319,260 @@ unsigned MachOObjectFile::getArch() const {
}
}
+StringRef MachOObjectFile::getLoadName() const {
+ // TODO: Implement
+ report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
+}
+
+relocation_iterator MachOObjectFile::getSectionRelBegin(unsigned Index) const {
+ DataRefImpl DRI;
+ DRI.d.a = Index;
+ return getSectionRelBegin(DRI);
+}
+
+relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const {
+ DataRefImpl DRI;
+ DRI.d.a = Index;
+ return getSectionRelEnd(DRI);
+}
+
+StringRef
+MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
+ ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
+ return parseSegmentOrSectionName(Raw.data());
+}
+
+ArrayRef<char>
+MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
+ const SectionBase *Base =
+ reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
+ return ArrayRef<char>(Base->Name);
+}
+
+ArrayRef<char>
+MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
+ const SectionBase *Base =
+ reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
+ return ArrayRef<char>(Base->SegmentName);
+}
+
+bool
+MachOObjectFile::isRelocationScattered(const macho::RelocationEntry &RE)
+ const {
+ if (getCPUType(this) == llvm::MachO::CPUTypeX86_64)
+ return false;
+ return getPlainRelocationAddress(RE) & macho::RF_Scattered;
+}
+
+unsigned MachOObjectFile::getPlainRelocationSymbolNum(const macho::RelocationEntry &RE) const {
+ if (isLittleEndian())
+ return RE.Word1 & 0xffffff;
+ return RE.Word1 >> 8;
+}
+
+bool MachOObjectFile::getPlainRelocationExternal(const macho::RelocationEntry &RE) const {
+ if (isLittleEndian())
+ return (RE.Word1 >> 27) & 1;
+ return (RE.Word1 >> 4) & 1;
+}
+
+bool
+MachOObjectFile::getScatteredRelocationScattered(const macho::RelocationEntry &RE) const {
+ return RE.Word0 >> 31;
+}
+
+uint32_t
+MachOObjectFile::getScatteredRelocationValue(const macho::RelocationEntry &RE) const {
+ return RE.Word1;
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationAddress(const macho::RelocationEntry &RE) const {
+ if (isRelocationScattered(RE))
+ return getScatteredRelocationAddress(RE);
+ return getPlainRelocationAddress(RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationPCRel(const macho::RelocationEntry &RE) const {
+ if (isRelocationScattered(RE))
+ return getScatteredRelocationPCRel(this, RE);
+ return getPlainRelocationPCRel(this, RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationLength(const macho::RelocationEntry &RE) const {
+ if (isRelocationScattered(RE))
+ return getScatteredRelocationLength(RE);
+ return getPlainRelocationLength(this, RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationType(const macho::RelocationEntry &RE) const {
+ if (isRelocationScattered(RE))
+ return getScatteredRelocationType(RE);
+ return getPlainRelocationType(this, RE);
+}
+
+SectionRef
+MachOObjectFile::getRelocationSection(const macho::RelocationEntry &RE) const {
+ if (isRelocationScattered(RE) || getPlainRelocationExternal(RE))
+ return *end_sections();
+ unsigned SecNum = getPlainRelocationSymbolNum(RE) - 1;
+ DataRefImpl DRI;
+ DRI.d.a = SecNum;
+ return SectionRef(DRI, this);
+}
+
+MachOObjectFile::LoadCommandInfo
+MachOObjectFile::getFirstLoadCommandInfo() const {
+ MachOObjectFile::LoadCommandInfo Load;
+
+ unsigned HeaderSize = is64Bit() ? macho::Header64Size : macho::Header32Size;
+ Load.Ptr = getPtr(this, HeaderSize);
+ Load.C = getStruct<macho::LoadCommand>(this, Load.Ptr);
+ return Load;
+}
+
+MachOObjectFile::LoadCommandInfo
+MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const {
+ MachOObjectFile::LoadCommandInfo Next;
+ Next.Ptr = L.Ptr + L.C.Size;
+ Next.C = getStruct<macho::LoadCommand>(this, Next.Ptr);
+ return Next;
+}
+
+macho::Section MachOObjectFile::getSection(DataRefImpl DRI) const {
+ return getStruct<macho::Section>(this, Sections[DRI.d.a]);
+}
+
+macho::Section64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
+ return getStruct<macho::Section64>(this, Sections[DRI.d.a]);
+}
+
+macho::Section MachOObjectFile::getSection(const LoadCommandInfo &L,
+ unsigned Index) const {
+ const char *Sec = getSectionPtr(this, L, Index);
+ return getStruct<macho::Section>(this, Sec);
+}
+
+macho::Section64 MachOObjectFile::getSection64(const LoadCommandInfo &L,
+ unsigned Index) const {
+ const char *Sec = getSectionPtr(this, L, Index);
+ return getStruct<macho::Section64>(this, Sec);
+}
+
+macho::SymbolTableEntry
+MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const {
+ const char *P = reinterpret_cast<const char *>(DRI.p);
+ return getStruct<macho::SymbolTableEntry>(this, P);
+}
+
+macho::Symbol64TableEntry
+MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const {
+ const char *P = reinterpret_cast<const char *>(DRI.p);
+ return getStruct<macho::Symbol64TableEntry>(this, P);
+}
+
+macho::LinkeditDataLoadCommand
+MachOObjectFile::getLinkeditDataLoadCommand(const MachOObjectFile::LoadCommandInfo &L) const {
+ return getStruct<macho::LinkeditDataLoadCommand>(this, L.Ptr);
+}
+
+macho::SegmentLoadCommand
+MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<macho::SegmentLoadCommand>(this, L.Ptr);
+}
+
+macho::Segment64LoadCommand
+MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<macho::Segment64LoadCommand>(this, L.Ptr);
+}
+
+macho::LinkerOptionsLoadCommand
+MachOObjectFile::getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<macho::LinkerOptionsLoadCommand>(this, L.Ptr);
+}
+
+macho::RelocationEntry
+MachOObjectFile::getRelocation(DataRefImpl Rel) const {
+ const char *P = reinterpret_cast<const char *>(Rel.p);
+ return getStruct<macho::RelocationEntry>(this, P);
+}
+
+macho::Header MachOObjectFile::getHeader() const {
+ return getStruct<macho::Header>(this, getPtr(this, 0));
+}
+
+macho::Header64Ext MachOObjectFile::getHeader64Ext() const {
+ return
+ getStruct<macho::Header64Ext>(this, getPtr(this, sizeof(macho::Header)));
+}
+
+macho::IndirectSymbolTableEntry MachOObjectFile::getIndirectSymbolTableEntry(
+ const macho::DysymtabLoadCommand &DLC,
+ unsigned Index) const {
+ uint64_t Offset = DLC.IndirectSymbolTableOffset +
+ Index * sizeof(macho::IndirectSymbolTableEntry);
+ return getStruct<macho::IndirectSymbolTableEntry>(this, getPtr(this, Offset));
+}
+
+macho::DataInCodeTableEntry
+MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
+ unsigned Index) const {
+ uint64_t Offset = DataOffset + Index * sizeof(macho::DataInCodeTableEntry);
+ return getStruct<macho::DataInCodeTableEntry>(this, getPtr(this, Offset));
+}
+
+macho::SymtabLoadCommand MachOObjectFile::getSymtabLoadCommand() const {
+ return getStruct<macho::SymtabLoadCommand>(this, SymtabLoadCmd);
+}
+
+macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const {
+ return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
+}
+
+StringRef MachOObjectFile::getStringTableData() const {
+ macho::SymtabLoadCommand S = getSymtabLoadCommand();
+ return getData().substr(S.StringTableOffset, S.StringTableSize);
+}
+
+bool MachOObjectFile::is64Bit() const {
+ return getType() == getMachOType(false, true) ||
+ getType() == getMachOType(true, true);
+}
+
+void MachOObjectFile::ReadULEB128s(uint64_t Index,
+ SmallVectorImpl<uint64_t> &Out) const {
+ DataExtractor extractor(ObjectFile::getData(), true, 0);
+
+ uint32_t offset = Index;
+ uint64_t data = 0;
+ while (uint64_t delta = extractor.getULEB128(&offset)) {
+ data += delta;
+ Out.push_back(data);
+ }
+}
+
+ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+ StringRef Magic = Buffer->getBuffer().slice(0, 4);
+ error_code ec;
+ ObjectFile *Ret;
+ if (Magic == "\xFE\xED\xFA\xCE")
+ Ret = new MachOObjectFile(Buffer, false, false, ec);
+ else if (Magic == "\xCE\xFA\xED\xFE")
+ Ret = new MachOObjectFile(Buffer, true, false, ec);
+ else if (Magic == "\xFE\xED\xFA\xCF")
+ Ret = new MachOObjectFile(Buffer, false, true, ec);
+ else if (Magic == "\xCF\xFA\xED\xFE")
+ Ret = new MachOObjectFile(Buffer, true, true, ec);
+ else
+ return NULL;
+
+ if (ec)
+ return NULL;
+ return Ret;
+}
+
} // end namespace object
} // end namespace llvm
diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp
index f061ea7..3e2c78e 100644
--- a/contrib/llvm/lib/Object/Object.cpp
+++ b/contrib/llvm/lib/Object/Object.cpp
@@ -12,12 +12,51 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm-c/Object.h"
using namespace llvm;
using namespace object;
+inline ObjectFile *unwrap(LLVMObjectFileRef OF) {
+ return reinterpret_cast<ObjectFile*>(OF);
+}
+
+inline LLVMObjectFileRef wrap(const ObjectFile *OF) {
+ return reinterpret_cast<LLVMObjectFileRef>(const_cast<ObjectFile*>(OF));
+}
+
+inline section_iterator *unwrap(LLVMSectionIteratorRef SI) {
+ return reinterpret_cast<section_iterator*>(SI);
+}
+
+inline LLVMSectionIteratorRef
+wrap(const section_iterator *SI) {
+ return reinterpret_cast<LLVMSectionIteratorRef>
+ (const_cast<section_iterator*>(SI));
+}
+
+inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) {
+ return reinterpret_cast<symbol_iterator*>(SI);
+}
+
+inline LLVMSymbolIteratorRef
+wrap(const symbol_iterator *SI) {
+ return reinterpret_cast<LLVMSymbolIteratorRef>
+ (const_cast<symbol_iterator*>(SI));
+}
+
+inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) {
+ return reinterpret_cast<relocation_iterator*>(SI);
+}
+
+inline LLVMRelocationIteratorRef
+wrap(const relocation_iterator *SI) {
+ return reinterpret_cast<LLVMRelocationIteratorRef>
+ (const_cast<relocation_iterator*>(SI));
+}
+
// ObjectFile creation
LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
index 860c87b..77fd995 100644
--- a/contrib/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -23,10 +23,16 @@ using namespace object;
void ObjectFile::anchor() { }
-ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source)
: Binary(Type, source) {
}
+error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI,
+ uint32_t &Result) const {
+ Result = 0;
+ return object_error::success;
+}
+
ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
if (!Object || Object->getBufferSize() < 64)
return 0;
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index 560d7eb..18d3db5 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/system_error.h"
#include <cerrno>
#include <cstdlib>
+#include <map>
using namespace llvm;
using namespace cl;
@@ -106,6 +107,17 @@ void Option::addArgument() {
MarkOptionsChanged();
}
+// This collects the different option categories that have been registered.
+typedef SmallPtrSet<OptionCategory*,16> OptionCatSet;
+static ManagedStatic<OptionCatSet> RegisteredOptionCategories;
+
+// Initialise the general option category.
+OptionCategory llvm::cl::GeneralCategory("General options");
+
+void OptionCategory::registerCategory()
+{
+ RegisteredOptionCategories->insert(this);
+}
//===----------------------------------------------------------------------===//
// Basic, shared command line option processing machinery.
@@ -1222,11 +1234,20 @@ sortOpts(StringMap<Option*> &OptMap,
namespace {
class HelpPrinter {
+protected:
const bool ShowHidden;
+ typedef SmallVector<std::pair<const char *, Option*>,128> StrOptionPairVector;
+ // Print the options. Opts is assumed to be alphabetically sorted.
+ virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {
+ for (size_t i = 0, e = Opts.size(); i != e; ++i)
+ Opts[i].second->printOptionInfo(MaxArgLen);
+ }
public:
explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {}
+ virtual ~HelpPrinter() {}
+ // Invoke the printer.
void operator=(bool Value) {
if (Value == false) return;
@@ -1236,7 +1257,7 @@ public:
StringMap<Option*> OptMap;
GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
- SmallVector<std::pair<const char *, Option*>, 128> Opts;
+ StrOptionPairVector Opts;
sortOpts(OptMap, Opts, ShowHidden);
if (ProgramOverview)
@@ -1267,12 +1288,12 @@ public:
MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
outs() << "OPTIONS:\n";
- for (size_t i = 0, e = Opts.size(); i != e; ++i)
- Opts[i].second->printOptionInfo(MaxArgLen);
+ printOptions(Opts, MaxArgLen);
// Print any extra help the user has declared.
for (std::vector<const char *>::iterator I = MoreHelp->begin(),
- E = MoreHelp->end(); I != E; ++I)
+ E = MoreHelp->end();
+ I != E; ++I)
outs() << *I;
MoreHelp->clear();
@@ -1280,21 +1301,152 @@ public:
exit(1);
}
};
+
+class CategorizedHelpPrinter : public HelpPrinter {
+public:
+ explicit CategorizedHelpPrinter(bool showHidden) : HelpPrinter(showHidden) {}
+
+ // Helper function for printOptions().
+ // It shall return true if A's name should be lexographically
+ // ordered before B's name. It returns false otherwise.
+ static bool OptionCategoryCompare(OptionCategory *A, OptionCategory *B) {
+ int Length = strcmp(A->getName(), B->getName());
+ assert(Length != 0 && "Duplicate option categories");
+ return Length < 0;
+ }
+
+ // Make sure we inherit our base class's operator=()
+ using HelpPrinter::operator= ;
+
+protected:
+ virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {
+ std::vector<OptionCategory *> SortedCategories;
+ std::map<OptionCategory *, std::vector<Option *> > CategorizedOptions;
+
+ // Collect registered option categories into vector in preperation for
+ // sorting.
+ for (OptionCatSet::const_iterator I = RegisteredOptionCategories->begin(),
+ E = RegisteredOptionCategories->end();
+ I != E; ++I)
+ SortedCategories.push_back(*I);
+
+ // Sort the different option categories alphabetically.
+ assert(SortedCategories.size() > 0 && "No option categories registered!");
+ std::sort(SortedCategories.begin(), SortedCategories.end(),
+ OptionCategoryCompare);
+
+ // Create map to empty vectors.
+ for (std::vector<OptionCategory *>::const_iterator
+ I = SortedCategories.begin(),
+ E = SortedCategories.end();
+ I != E; ++I)
+ CategorizedOptions[*I] = std::vector<Option *>();
+
+ // Walk through pre-sorted options and assign into categories.
+ // Because the options are already alphabetically sorted the
+ // options within categories will also be alphabetically sorted.
+ for (size_t I = 0, E = Opts.size(); I != E; ++I) {
+ Option *Opt = Opts[I].second;
+ assert(CategorizedOptions.count(Opt->Category) > 0 &&
+ "Option has an unregistered category");
+ CategorizedOptions[Opt->Category].push_back(Opt);
+ }
+
+ // Now do printing.
+ for (std::vector<OptionCategory *>::const_iterator
+ Category = SortedCategories.begin(),
+ E = SortedCategories.end();
+ Category != E; ++Category) {
+ // Hide empty categories for -help, but show for -help-hidden.
+ bool IsEmptyCategory = CategorizedOptions[*Category].size() == 0;
+ if (!ShowHidden && IsEmptyCategory)
+ continue;
+
+ // Print category information.
+ outs() << "\n";
+ outs() << (*Category)->getName() << ":\n";
+
+ // Check if description is set.
+ if ((*Category)->getDescription() != 0)
+ outs() << (*Category)->getDescription() << "\n\n";
+ else
+ outs() << "\n";
+
+ // When using -help-hidden explicitly state if the category has no
+ // options associated with it.
+ if (IsEmptyCategory) {
+ outs() << " This option category has no options.\n";
+ continue;
+ }
+ // Loop over the options in the category and print.
+ for (std::vector<Option *>::const_iterator
+ Opt = CategorizedOptions[*Category].begin(),
+ E = CategorizedOptions[*Category].end();
+ Opt != E; ++Opt)
+ (*Opt)->printOptionInfo(MaxArgLen);
+ }
+ }
+};
+
+// This wraps the Uncategorizing and Categorizing printers and decides
+// at run time which should be invoked.
+class HelpPrinterWrapper {
+private:
+ HelpPrinter &UncategorizedPrinter;
+ CategorizedHelpPrinter &CategorizedPrinter;
+
+public:
+ explicit HelpPrinterWrapper(HelpPrinter &UncategorizedPrinter,
+ CategorizedHelpPrinter &CategorizedPrinter) :
+ UncategorizedPrinter(UncategorizedPrinter),
+ CategorizedPrinter(CategorizedPrinter) { }
+
+ // Invoke the printer.
+ void operator=(bool Value);
+};
+
} // End anonymous namespace
-// Define the two HelpPrinter instances that are used to print out help, or
-// help-hidden...
-//
-static HelpPrinter NormalPrinter(false);
-static HelpPrinter HiddenPrinter(true);
+// Declare the four HelpPrinter instances that are used to print out help, or
+// help-hidden as an uncategorized list or in categories.
+static HelpPrinter UncategorizedNormalPrinter(false);
+static HelpPrinter UncategorizedHiddenPrinter(true);
+static CategorizedHelpPrinter CategorizedNormalPrinter(false);
+static CategorizedHelpPrinter CategorizedHiddenPrinter(true);
+
+// Declare HelpPrinter wrappers that will decide whether or not to invoke
+// a categorizing help printer
+static HelpPrinterWrapper WrappedNormalPrinter(UncategorizedNormalPrinter,
+ CategorizedNormalPrinter);
+static HelpPrinterWrapper WrappedHiddenPrinter(UncategorizedHiddenPrinter,
+ CategorizedHiddenPrinter);
+
+// Define uncategorized help printers.
+// -help-list is hidden by default because if Option categories are being used
+// then -help behaves the same as -help-list.
static cl::opt<HelpPrinter, true, parser<bool> >
-HOp("help", cl::desc("Display available options (-help-hidden for more)"),
- cl::location(NormalPrinter), cl::ValueDisallowed);
+HLOp("help-list",
+ cl::desc("Display list of available options (-help-list-hidden for more)"),
+ cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed);
static cl::opt<HelpPrinter, true, parser<bool> >
+HLHOp("help-list-hidden",
+ cl::desc("Display list of all available options"),
+ cl::location(UncategorizedHiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+// Define uncategorized/categorized help printers. These printers change their
+// behaviour at runtime depending on whether one or more Option categories have
+// been declared.
+static cl::opt<HelpPrinterWrapper, true, parser<bool> >
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+ cl::location(WrappedNormalPrinter), cl::ValueDisallowed);
+
+static cl::opt<HelpPrinterWrapper, true, parser<bool> >
HHOp("help-hidden", cl::desc("Display all available options"),
- cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+ cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+
static cl::opt<bool>
PrintOptions("print-options",
@@ -1306,6 +1458,24 @@ PrintAllOptions("print-all-options",
cl::desc("Print all option values after command line parsing"),
cl::Hidden, cl::init(false));
+void HelpPrinterWrapper::operator=(bool Value) {
+ if (Value == false)
+ return;
+
+ // Decide which printer to invoke. If more than one option category is
+ // registered then it is useful to show the categorized help instead of
+ // uncategorized help.
+ if (RegisteredOptionCategories->size() > 1) {
+ // unhide -help-list option so user can have uncategorized output if they
+ // want it.
+ HLOp.setHiddenFlag(NotHidden);
+
+ CategorizedPrinter = true; // Invoke categorized printer
+ }
+ else
+ UncategorizedPrinter = true; // Invoke uncategorized printer
+}
+
// Print the value of each option.
void cl::PrintOptionValues() {
if (!PrintOptions && !PrintAllOptions) return;
@@ -1393,14 +1563,22 @@ VersOp("version", cl::desc("Display the version of this program"),
cl::location(VersionPrinterInstance), cl::ValueDisallowed);
// Utility function for printing the help message.
-void cl::PrintHelpMessage() {
- // This looks weird, but it actually prints the help message. The
- // NormalPrinter variable is a HelpPrinter and the help gets printed when
- // its operator= is invoked. That's because the "normal" usages of the
- // help printer is to be assigned true/false depending on whether the
- // -help option was given or not. Since we're circumventing that we have
- // to make it look like -help was given, so we assign true.
- NormalPrinter = true;
+void cl::PrintHelpMessage(bool Hidden, bool Categorized) {
+ // This looks weird, but it actually prints the help message. The Printers are
+ // types of HelpPrinter and the help gets printed when its operator= is
+ // invoked. That's because the "normal" usages of the help printer is to be
+ // assigned true/false depending on whether -help or -help-hidden was given or
+ // not. Since we're circumventing that we have to make it look like -help or
+ // -help-hidden were given, so we assign true.
+
+ if (!Hidden && !Categorized)
+ UncategorizedNormalPrinter = true;
+ else if (!Hidden && Categorized)
+ CategorizedNormalPrinter = true;
+ else if (Hidden && !Categorized)
+ UncategorizedHiddenPrinter = true;
+ else
+ CategorizedHiddenPrinter = true;
}
/// Utility function for printing version number.
@@ -1418,3 +1596,13 @@ void cl::AddExtraVersionPrinter(void (*func)()) {
ExtraVersionPrinters->push_back(func);
}
+
+void cl::getRegisteredOptions(StringMap<Option*> &Map)
+{
+ // Get all the options.
+ SmallVector<Option*, 4> PositionalOpts; //NOT USED
+ SmallVector<Option*, 4> SinkOpts; //NOT USED
+ assert(Map.size() == 0 && "StringMap must be empty");
+ GetOptionInfo(PositionalOpts, SinkOpts, Map);
+ return;
+}
diff --git a/contrib/llvm/lib/Support/Compression.cpp b/contrib/llvm/lib/Support/Compression.cpp
new file mode 100644
index 0000000..fd8a874
--- /dev/null
+++ b/contrib/llvm/lib/Support/Compression.cpp
@@ -0,0 +1,97 @@
+//===--- Compression.cpp - Compression implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements compression functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Compression.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+using namespace llvm;
+
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+static int encodeZlibCompressionLevel(zlib::CompressionLevel Level) {
+ switch (Level) {
+ case zlib::NoCompression: return 0;
+ case zlib::BestSpeedCompression: return 1;
+ case zlib::DefaultCompression: return Z_DEFAULT_COMPRESSION;
+ case zlib::BestSizeCompression: return 9;
+ }
+ llvm_unreachable("Invalid zlib::CompressionLevel!");
+}
+
+static zlib::Status encodeZlibReturnValue(int ReturnValue) {
+ switch (ReturnValue) {
+ case Z_OK: return zlib::StatusOK;
+ case Z_MEM_ERROR: return zlib::StatusOutOfMemory;
+ case Z_BUF_ERROR: return zlib::StatusBufferTooShort;
+ case Z_STREAM_ERROR: return zlib::StatusInvalidArg;
+ case Z_DATA_ERROR: return zlib::StatusInvalidData;
+ default: llvm_unreachable("unknown zlib return status!");
+ }
+}
+
+bool zlib::isAvailable() { return true; }
+zlib::Status zlib::compress(StringRef InputBuffer,
+ OwningPtr<MemoryBuffer> &CompressedBuffer,
+ CompressionLevel Level) {
+ unsigned long CompressedSize = ::compressBound(InputBuffer.size());
+ OwningArrayPtr<char> TmpBuffer(new char[CompressedSize]);
+ int CLevel = encodeZlibCompressionLevel(Level);
+ Status Res = encodeZlibReturnValue(::compress2(
+ (Bytef *)TmpBuffer.get(), &CompressedSize,
+ (const Bytef *)InputBuffer.data(), InputBuffer.size(), CLevel));
+ if (Res == StatusOK) {
+ CompressedBuffer.reset(MemoryBuffer::getMemBufferCopy(
+ StringRef(TmpBuffer.get(), CompressedSize)));
+ // Tell MSan that memory initialized by zlib is valid.
+ __msan_unpoison(CompressedBuffer->getBufferStart(), CompressedSize);
+ }
+ return Res;
+}
+
+zlib::Status zlib::uncompress(StringRef InputBuffer,
+ OwningPtr<MemoryBuffer> &UncompressedBuffer,
+ size_t UncompressedSize) {
+ OwningArrayPtr<char> TmpBuffer(new char[UncompressedSize]);
+ Status Res = encodeZlibReturnValue(
+ ::uncompress((Bytef *)TmpBuffer.get(), (uLongf *)&UncompressedSize,
+ (const Bytef *)InputBuffer.data(), InputBuffer.size()));
+ if (Res == StatusOK) {
+ UncompressedBuffer.reset(MemoryBuffer::getMemBufferCopy(
+ StringRef(TmpBuffer.get(), UncompressedSize)));
+ // Tell MSan that memory initialized by zlib is valid.
+ __msan_unpoison(UncompressedBuffer->getBufferStart(), UncompressedSize);
+ }
+ return Res;
+}
+
+#else
+bool zlib::isAvailable() { return false; }
+zlib::Status zlib::compress(StringRef InputBuffer,
+ OwningPtr<MemoryBuffer> &CompressedBuffer,
+ CompressionLevel Level) {
+ return zlib::StatusUnsupported;
+}
+zlib::Status zlib::uncompress(StringRef InputBuffer,
+ OwningPtr<MemoryBuffer> &UncompressedBuffer,
+ size_t UncompressedSize) {
+ return zlib::StatusUnsupported;
+}
+#endif
+
diff --git a/contrib/llvm/lib/Support/DataExtractor.cpp b/contrib/llvm/lib/Support/DataExtractor.cpp
index 3d5cce0..a564d21 100644
--- a/contrib/llvm/lib/Support/DataExtractor.cpp
+++ b/contrib/llvm/lib/Support/DataExtractor.cpp
@@ -20,7 +20,7 @@ static T getU(uint32_t *offset_ptr, const DataExtractor *de,
uint32_t offset = *offset_ptr;
if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) {
std::memcpy(&val, &Data[offset], sizeof(val));
- if (sys::isLittleEndianHost() != isLittleEndian)
+ if (sys::IsLittleEndianHost != isLittleEndian)
val = sys::SwapByteOrder(val);
// Advance the offset
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
index 36e33b5..145f12d 100644
--- a/contrib/llvm/lib/Support/FoldingSet.cpp
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -101,7 +101,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
// Otherwise do it the hard way.
// To be compatible with above bulk transfer, we need to take endianness
// into account.
- if (sys::isBigEndianHost()) {
+ if (sys::IsBigEndianHost) {
for (Pos += 4; Pos <= Size; Pos += 4) {
unsigned V = ((unsigned char)String[Pos - 4] << 24) |
((unsigned char)String[Pos - 3] << 16) |
@@ -110,7 +110,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
Bits.push_back(V);
}
} else {
- assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+ assert(sys::IsLittleEndianHost && "Unexpected host endianness");
for (Pos += 4; Pos <= Size; Pos += 4) {
unsigned V = ((unsigned char)String[Pos - 1] << 24) |
((unsigned char)String[Pos - 2] << 16) |
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index 73d98d1..a7c7a95 100644
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -112,19 +112,19 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
#endif
}
-static bool OSHasAVXSupport() {
-#if defined(__GNUC__)
- // Check xgetbv; this uses a .byte sequence instead of the instruction
- // directly because older assemblers do not include support for xgetbv and
- // there is no easy way to conditionally compile based on the assembler used.
- int rEAX, rEDX;
- __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
-#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
- unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-#else
- int rEAX = 0; // Ensures we return false
-#endif
- return (rEAX & 6) == 6;
+static bool OSHasAVXSupport() {
+#if defined(__GNUC__)
+ // Check xgetbv; this uses a .byte sequence instead of the instruction
+ // directly because older assemblers do not include support for xgetbv and
+ // there is no easy way to conditionally compile based on the assembler used.
+ int rEAX, rEDX;
+ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+ unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+ int rEAX = 0; // Ensures we return false
+#endif
+ return (rEAX & 6) == 6;
}
static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
@@ -355,10 +355,15 @@ std::string sys::getHostCPUName() {
case 20:
return "btver1";
case 21:
- if (Model <= 15)
- return "bdver1";
- else if (Model <= 31)
+ if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
+ return "btver1";
+ if (Model > 15 && Model <= 31)
return "bdver2";
+ return "bdver1";
+ case 22:
+ if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
+ return "btver1";
+ return "btver2";
default:
return "generic";
}
@@ -608,7 +613,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features){
#endif
std::string sys::getProcessTriple() {
- Triple PT(LLVM_HOSTTRIPLE);
+ Triple PT(LLVM_HOST_TRIPLE);
if (sizeof(void *) == 8 && PT.isArch32Bit())
PT = PT.get64BitArchVariant();
diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp
index 92d8b83..2917e27 100644
--- a/contrib/llvm/lib/Support/LockFileManager.cpp
+++ b/contrib/llvm/lib/Support/LockFileManager.cpp
@@ -174,8 +174,8 @@ void LockFileManager::waitForUnlock() {
Interval.tv_sec = 0;
Interval.tv_nsec = 1000000;
#endif
- // Don't wait more than an hour for the file to appear.
- const unsigned MaxSeconds = 3600;
+ // Don't wait more than five minutes for the file to appear.
+ unsigned MaxSeconds = 300;
bool LockFileGone = false;
do {
// Sleep for the designated interval, to allow the owning process time to
@@ -187,21 +187,48 @@ void LockFileManager::waitForUnlock() {
#else
nanosleep(&Interval, NULL);
#endif
- // If the lock file no longer exists, wait for the actual file.
bool Exists = false;
+ bool LockFileJustDisappeared = false;
+
+ // If the lock file is still expected to be there, check whether it still
+ // is.
if (!LockFileGone) {
if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) {
LockFileGone = true;
+ LockFileJustDisappeared = true;
Exists = false;
}
}
+
+ // If the lock file is no longer there, check if the original file is
+ // available now.
if (LockFileGone) {
- if (!sys::fs::exists(FileName.str(), Exists) && Exists)
+ if (!sys::fs::exists(FileName.str(), Exists) && Exists) {
return;
+ }
+
+ // The lock file is gone, so now we're waiting for the original file to
+ // show up. If this just happened, reset our waiting intervals and keep
+ // waiting.
+ if (LockFileJustDisappeared) {
+ MaxSeconds = 5;
+
+#if LLVM_ON_WIN32
+ Interval = 1;
+#else
+ Interval.tv_sec = 0;
+ Interval.tv_nsec = 1000000;
+#endif
+ continue;
+ }
}
- if (!processStillExecuting((*Owner).first, (*Owner).second))
+ // If we're looking for the lock file to disappear, but the process
+ // owning the lock died without cleaning up, just bail out.
+ if (!LockFileGone &&
+ !processStillExecuting((*Owner).first, (*Owner).second)) {
return;
+ }
// Exponentially increase the time we wait for the lock to be removed.
#if LLVM_ON_WIN32
diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp
index 58a6ea7..ac53a9e9 100644
--- a/contrib/llvm/lib/Support/PathV2.cpp
+++ b/contrib/llvm/lib/Support/PathV2.cpp
@@ -789,8 +789,11 @@ file_magic identify_magic(StringRef magic) {
case '\177':
if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
- if (magic.size() >= 18 && magic[17] == 0)
- switch (magic[16]) {
+ bool Data2MSB = magic[5] == 2;
+ unsigned high = Data2MSB ? 16 : 17;
+ unsigned low = Data2MSB ? 17 : 16;
+ if (magic.size() >= 18 && magic[high] == 0)
+ switch (magic[low]) {
default: break;
case 1: return file_magic::elf_relocatable;
case 2: return file_magic::elf_executable;
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index d2508ac..412e34c 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -32,6 +32,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case r600: return "r600";
case sparc: return "sparc";
case sparcv9: return "sparcv9";
+ case systemz: return "s390x";
case tce: return "tce";
case thumb: return "thumb";
case x86: return "i386";
@@ -76,6 +77,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case sparcv9:
case sparc: return "sparc";
+ case systemz: return "systemz";
+
case x86:
case x86_64: return "x86";
@@ -170,6 +173,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("hexagon", hexagon)
.Case("sparc", sparc)
.Case("sparcv9", sparcv9)
+ .Case("systemz", systemz)
.Case("tce", tce)
.Case("thumb", thumb)
.Case("x86", x86)
@@ -233,6 +237,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("mips64el", Triple::mips64el)
.Case("r600", Triple::r600)
.Case("hexagon", Triple::hexagon)
+ .Case("s390x", Triple::systemz)
.Case("sparc", Triple::sparc)
.Case("sparcv9", Triple::sparcv9)
.Case("tce", Triple::tce)
@@ -687,6 +692,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::nvptx64:
case llvm::Triple::ppc64:
case llvm::Triple::sparcv9:
+ case llvm::Triple::systemz:
case llvm::Triple::x86_64:
case llvm::Triple::spir64:
return 64;
@@ -712,6 +718,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::UnknownArch:
case Triple::aarch64:
case Triple::msp430:
+ case Triple::systemz:
T.setArch(UnknownArch);
break;
@@ -769,6 +776,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::nvptx64:
case Triple::ppc64:
case Triple::sparcv9:
+ case Triple::systemz:
case Triple::x86_64:
// Already 64-bit.
break;
diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc
index f397408..2bb9bf1 100644
--- a/contrib/llvm/lib/Support/Unix/Memory.inc
+++ b/contrib/llvm/lib/Support/Unix/Memory.inc
@@ -325,7 +325,7 @@ void Memory::InvalidateInstructionCache(const void *Addr,
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
asm volatile("icbi 0, %0" : : "r"(Line));
asm volatile("isync");
-# elif defined(__arm__) && defined(__GNUC__) && !defined(__FreeBSD__)
+# elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) && !defined(__FreeBSD__)
// FIXME: Can we safely always call this for __GNUC__ everywhere?
const char *Start = static_cast<const char *>(Addr);
const char *End = Start + Len;
diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc
index a3dfd4b..7e0aead 100644
--- a/contrib/llvm/lib/Support/Unix/PathV2.inc
+++ b/contrib/llvm/lib/Support/Unix/PathV2.inc
@@ -430,9 +430,7 @@ rety_open_create:
if (SavedErrno == errc::file_exists)
goto retry_random_path;
// If path prefix doesn't exist, try to create it.
- if (SavedErrno == errc::no_such_file_or_directory &&
- !exists(path::parent_path(RandomPath)) &&
- !TriedToCreateParent) {
+ if (SavedErrno == errc::no_such_file_or_directory && !TriedToCreateParent) {
TriedToCreateParent = true;
StringRef p(RandomPath);
SmallString<64> dir_to_create;
diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc
index 117151c..aa03d48 100644
--- a/contrib/llvm/lib/Support/Unix/Program.inc
+++ b/contrib/llvm/lib/Support/Unix/Program.inc
@@ -32,6 +32,9 @@
#if HAVE_FCNTL_H
#include <fcntl.h>
#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
#ifdef HAVE_POSIX_SPAWN
#include <spawn.h>
#if !defined(__APPLE__)
@@ -409,4 +412,25 @@ error_code Program::ChangeStderrToBinary(){
return make_error_code(errc::success);
}
+bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+ static long ArgMax = sysconf(_SC_ARG_MAX);
+
+ // System says no practical limit.
+ if (ArgMax == -1)
+ return true;
+
+ // Conservatively account for space required by environment variables.
+ ArgMax /= 2;
+
+ size_t ArgLength = 0;
+ for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
+ I != E; ++I) {
+ ArgLength += strlen(*I) + 1;
+ if (ArgLength > size_t(ArgMax)) {
+ return false;
+ }
+ }
+ return true;
+}
+
}
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
index 66338f1..64d1fc1 100644
--- a/contrib/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -27,10 +27,12 @@
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
-#if HAVE_DLFCN_H && __GNUG__
-#include <dlfcn.h>
+#if HAVE_CXXABI_H
#include <cxxabi.h>
#endif
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
#if HAVE_MACH_MACH_H
#include <mach/mach.h>
#endif
@@ -184,6 +186,15 @@ static RETSIGTYPE SignalHandler(int Sig) {
// Otherwise if it is a fault (like SEGV) run any handler.
for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
CallBacksToRun[i].first(CallBacksToRun[i].second);
+
+#ifdef __s390__
+ // On S/390, certain signals are delivered with PSW Address pointing to
+ // *after* the faulting instruction. Simply returning from the signal
+ // handler would continue execution after that point, instead of
+ // re-raising the signal. Raise the signal manually in those cases.
+ if (Sig == SIGILL || Sig == SIGFPE || Sig == SIGTRAP)
+ raise(Sig);
+#endif
}
void llvm::sys::RunInterruptHandlers() {
@@ -290,9 +301,13 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
(int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
if (dlinfo.dli_sname != NULL) {
- int res;
fputc(' ', FD);
+# if HAVE_CXXABI_H
+ int res;
char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+# else
+ char* d = NULL;
+# endif
if (d == NULL) fputs(dlinfo.dli_sname, FD);
else fputs(d, FD);
free(d);
diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc
index 691d6d4..619ae5d 100644
--- a/contrib/llvm/lib/Support/Windows/Program.inc
+++ b/contrib/llvm/lib/Support/Windows/Program.inc
@@ -126,20 +126,58 @@ static bool ArgNeedsQuotes(const char *Str) {
return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
}
+/// CountPrecedingBackslashes - Returns the number of backslashes preceding Cur
+/// in the C string Start.
+static unsigned int CountPrecedingBackslashes(const char *Start,
+ const char *Cur) {
+ unsigned int Count = 0;
+ --Cur;
+ while (Cur >= Start && *Cur == '\\') {
+ ++Count;
+ --Cur;
+ }
+ return Count;
+}
+
+/// EscapePrecedingEscapes - Append a backslash to Dst for every backslash
+/// preceding Cur in the Start string. Assumes Dst has enough space.
+static char *EscapePrecedingEscapes(char *Dst, const char *Start,
+ const char *Cur) {
+ unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Cur);
+ while (PrecedingEscapes > 0) {
+ *Dst++ = '\\';
+ --PrecedingEscapes;
+ }
+ return Dst;
+}
/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
/// CreateProcess and returns length of quoted arg with escaped quotes
static unsigned int ArgLenWithQuotes(const char *Str) {
- unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+ const char *Start = Str;
+ bool Quoted = ArgNeedsQuotes(Str);
+ unsigned int len = Quoted ? 2 : 0;
while (*Str != '\0') {
- if (*Str == '\"')
- ++len;
+ if (*Str == '\"') {
+ // We need to add a backslash, but ensure that it isn't escaped.
+ unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
+ len += PrecedingEscapes + 1;
+ }
+ // Note that we *don't* need to escape runs of backslashes that don't
+ // precede a double quote! See MSDN:
+ // http://msdn.microsoft.com/en-us/library/17w5ykft%28v=vs.85%29.aspx
++len;
++Str;
}
+ if (Quoted) {
+ // Make sure the closing quote doesn't get escaped by a trailing backslash.
+ unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
+ len += PrecedingEscapes + 1;
+ }
+
return len;
}
@@ -180,20 +218,27 @@ Program::Execute(const Path& path,
for (unsigned i = 0; args[i]; i++) {
const char *arg = args[i];
+ const char *start = arg;
bool needsQuoting = ArgNeedsQuotes(arg);
if (needsQuoting)
*p++ = '"';
while (*arg != '\0') {
- if (*arg == '\"')
+ if (*arg == '\"') {
+ // Escape all preceding escapes (if any), and then escape the quote.
+ p = EscapePrecedingEscapes(p, start, arg);
*p++ = '\\';
+ }
*p++ = *arg++;
}
- if (needsQuoting)
+ if (needsQuoting) {
+ // Make sure our quote doesn't get escaped by a trailing backslash.
+ p = EscapePrecedingEscapes(p, start, arg);
*p++ = '"';
+ }
*p++ = ' ';
}
@@ -396,4 +441,20 @@ error_code Program::ChangeStderrToBinary(){
return make_error_code(errc::success);
}
+bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+ // The documented max length of the command line passed to CreateProcess.
+ static const size_t MaxCommandStringLength = 32768;
+ size_t ArgLength = 0;
+ for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
+ I != E; ++I) {
+ // Account for the trailing space for every arg but the last one and the
+ // trailing NULL of the last argument.
+ ArgLength += ArgLenWithQuotes(*I) + 1;
+ if (ArgLength > MaxCommandStringLength) {
+ return false;
+ }
+ }
+ return true;
+}
+
}
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
index 3dd6660..b18b4d1 100644
--- a/contrib/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -178,6 +178,19 @@ namespace llvm {
//===----------------------------------------------------------------------===//
#ifdef _MSC_VER
+/// AvoidMessageBoxHook - Emulates hitting "retry" from an "abort, retry,
+/// ignore" CRT debug report dialog. "retry" raises an exception which
+/// ultimately triggers our stack dumper.
+static int AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
+ // Set *Return to the retry code for the return value of _CrtDbgReport:
+ // http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx
+ // This may also trigger just-in-time debugging via DebugBreak().
+ if (Return)
+ *Return = 1;
+ // Don't call _CrtDbgReport.
+ return TRUE;
+}
+
/// CRTReportHook - Function called on a CRT debugging event.
static int CRTReportHook(int ReportType, char *Message, int *Return) {
// Don't cause a DebugBreak() on return.
@@ -238,6 +251,15 @@ static void RegisterHandler() {
OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
+#ifdef _MSC_VER
+ const char *EnableMsgbox = getenv("LLVM_ENABLE_CRT_REPORT");
+ if (!EnableMsgbox || strcmp("0", EnableMsgbox) == 0) {
+ // Setting a report hook overrides the default behavior of popping an "abort,
+ // retry, or ignore" dialog.
+ _CrtSetReportHook(AvoidMessageBoxHook);
+ }
+#endif
+
// Environment variable to disable any kind of crash dialog.
if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
#ifdef _MSC_VER
diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp
index 2cead20..213f5e1 100644
--- a/contrib/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm/lib/Support/YAMLParser.cpp
@@ -260,7 +260,7 @@ public:
Token getNext();
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
SM.PrintMessage(Loc, Kind, Message, Ranges);
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index dc41f2f..daa7f1d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -367,9 +367,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// shoving a base register and an offset into the instruction then we may well
// need to scavenge registers. We should either specifically add an
// callee-save register for this purpose or allocate an extra spill slot.
-
bool BigStack =
- (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+ MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)
|| MFI->hasVarSizedObjects() // Access will be from X29: messes things up
|| (MFI->adjustsStack() && !hasReservedCallFrame(MF));
@@ -392,6 +391,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (ExtraReg != 0) {
MF.getRegInfo().setPhysRegUsed(ExtraReg);
} else {
+ assert(RS && "Expect register scavenger to be available");
+
// Create a stack slot for scavenging purposes. PrologEpilogInserter
// helpfully places it near either SP or FP for us to avoid
// infinitely-regression during scavenging.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 46b8221..102c71b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -70,6 +70,15 @@ public:
return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
}
+ /// Used for pre-lowered address-reference nodes, so we already know
+ /// the fields match. This operand's job is simply to add an
+ /// appropriate shift operand (i.e. 0) to the MOVZ/MOVK instruction.
+ bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
+ Imm = N;
+ Shift = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
@@ -88,6 +97,13 @@ public:
bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
+ SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
+ unsigned Op64);
+
+ /// Put the given constant into a pool and return a DAG which will give its
+ /// address.
+ SDValue getConstantPoolItemAddress(DebugLoc DL, const Constant *CV);
+
SDNode *TrySelectToMoveImm(SDNode *N);
SDNode *LowerToFPLitPool(SDNode *Node);
SDNode *SelectToLitPool(SDNode *N);
@@ -224,12 +240,51 @@ SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
return ResNode;
}
+SDValue
+AArch64DAGToDAGISel::getConstantPoolItemAddress(DebugLoc DL,
+ const Constant *CV) {
+ EVT PtrVT = TLI.getPointerTy();
+
+ switch (TLI.getTargetMachine().getCodeModel()) {
+ case CodeModel::Small: {
+ unsigned Alignment =
+ TLI.getDataLayout()->getABITypeAlignment(CV->getType());
+ return CurDAG->getNode(
+ AArch64ISD::WrapperSmall, DL, PtrVT,
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
+ CurDAG->getConstant(Alignment, MVT::i32));
+ }
+ case CodeModel::Large: {
+ SDNode *LitAddr;
+ LitAddr = CurDAG->getMachineNode(
+ AArch64::MOVZxii, DL, PtrVT,
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
+ CurDAG->getTargetConstant(0, MVT::i32));
+ LitAddr = CurDAG->getMachineNode(
+ AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
+ CurDAG->getTargetConstant(0, MVT::i32));
+ LitAddr = CurDAG->getMachineNode(
+ AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
+ CurDAG->getTargetConstant(0, MVT::i32));
+ LitAddr = CurDAG->getMachineNode(
+ AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+ CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
+ CurDAG->getTargetConstant(0, MVT::i32));
+ return SDValue(LitAddr, 0);
+ }
+ default:
+ llvm_unreachable("Only small and large code models supported now");
+ }
+}
+
SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
DebugLoc DL = Node->getDebugLoc();
uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
EVT DestType = Node->getValueType(0);
- EVT PtrVT = TLI.getPointerTy();
// Since we may end up loading a 64-bit constant from a 32-bit entry the
// constant in the pool may have a different type to the eventual node.
@@ -256,14 +311,8 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
MemType.getSizeInBits()),
UnsignedVal);
- SDValue PoolAddr;
+ SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType());
- PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
- CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
- AArch64II::MO_NO_FLAG),
- CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
- AArch64II::MO_LO12),
- CurDAG->getConstant(Alignment, MVT::i32));
return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
PoolAddr,
@@ -276,20 +325,10 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
DebugLoc DL = Node->getDebugLoc();
const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
- EVT PtrVT = TLI.getPointerTy();
EVT DestType = Node->getValueType(0);
unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType());
- SDValue PoolAddr;
-
- assert(TM.getCodeModel() == CodeModel::Small &&
- "Only small code model supported");
- PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
- CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
- AArch64II::MO_NO_FLAG),
- CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
- AArch64II::MO_LO12),
- CurDAG->getConstant(Alignment, MVT::i32));
+ SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
MachinePointerInfo::getConstantPool(),
@@ -318,6 +357,38 @@ AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
return true;
}
+SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
+ unsigned Op16,unsigned Op32,
+ unsigned Op64) {
+ // Mostly direct translation to the given operations, except that we preserve
+ // the AtomicOrdering for use later on.
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+ EVT VT = AN->getMemoryVT();
+
+ unsigned Op;
+ if (VT == MVT::i8)
+ Op = Op8;
+ else if (VT == MVT::i16)
+ Op = Op16;
+ else if (VT == MVT::i32)
+ Op = Op32;
+ else if (VT == MVT::i64)
+ Op = Op64;
+ else
+ llvm_unreachable("Unexpected atomic operation");
+
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 1; i < AN->getNumOperands(); ++i)
+ Ops.push_back(AN->getOperand(i));
+
+ Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
+ Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+
+ return CurDAG->SelectNodeTo(Node, Op,
+ AN->getValueType(0), MVT::Other,
+ &Ops[0], Ops.size());
+}
+
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@@ -328,6 +399,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
}
switch (Node->getOpcode()) {
+ case ISD::ATOMIC_LOAD_ADD:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_ADD_I8,
+ AArch64::ATOMIC_LOAD_ADD_I16,
+ AArch64::ATOMIC_LOAD_ADD_I32,
+ AArch64::ATOMIC_LOAD_ADD_I64);
+ case ISD::ATOMIC_LOAD_SUB:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_SUB_I8,
+ AArch64::ATOMIC_LOAD_SUB_I16,
+ AArch64::ATOMIC_LOAD_SUB_I32,
+ AArch64::ATOMIC_LOAD_SUB_I64);
+ case ISD::ATOMIC_LOAD_AND:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_AND_I8,
+ AArch64::ATOMIC_LOAD_AND_I16,
+ AArch64::ATOMIC_LOAD_AND_I32,
+ AArch64::ATOMIC_LOAD_AND_I64);
+ case ISD::ATOMIC_LOAD_OR:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_OR_I8,
+ AArch64::ATOMIC_LOAD_OR_I16,
+ AArch64::ATOMIC_LOAD_OR_I32,
+ AArch64::ATOMIC_LOAD_OR_I64);
+ case ISD::ATOMIC_LOAD_XOR:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_XOR_I8,
+ AArch64::ATOMIC_LOAD_XOR_I16,
+ AArch64::ATOMIC_LOAD_XOR_I32,
+ AArch64::ATOMIC_LOAD_XOR_I64);
+ case ISD::ATOMIC_LOAD_NAND:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_NAND_I8,
+ AArch64::ATOMIC_LOAD_NAND_I16,
+ AArch64::ATOMIC_LOAD_NAND_I32,
+ AArch64::ATOMIC_LOAD_NAND_I64);
+ case ISD::ATOMIC_LOAD_MIN:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_MIN_I8,
+ AArch64::ATOMIC_LOAD_MIN_I16,
+ AArch64::ATOMIC_LOAD_MIN_I32,
+ AArch64::ATOMIC_LOAD_MIN_I64);
+ case ISD::ATOMIC_LOAD_MAX:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_MAX_I8,
+ AArch64::ATOMIC_LOAD_MAX_I16,
+ AArch64::ATOMIC_LOAD_MAX_I32,
+ AArch64::ATOMIC_LOAD_MAX_I64);
+ case ISD::ATOMIC_LOAD_UMIN:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_UMIN_I8,
+ AArch64::ATOMIC_LOAD_UMIN_I16,
+ AArch64::ATOMIC_LOAD_UMIN_I32,
+ AArch64::ATOMIC_LOAD_UMIN_I64);
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_LOAD_UMAX_I8,
+ AArch64::ATOMIC_LOAD_UMAX_I16,
+ AArch64::ATOMIC_LOAD_UMAX_I32,
+ AArch64::ATOMIC_LOAD_UMAX_I64);
+ case ISD::ATOMIC_SWAP:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_SWAP_I8,
+ AArch64::ATOMIC_SWAP_I16,
+ AArch64::ATOMIC_SWAP_I32,
+ AArch64::ATOMIC_SWAP_I64);
+ case ISD::ATOMIC_CMP_SWAP:
+ return SelectAtomic(Node,
+ AArch64::ATOMIC_CMP_SWAP_I8,
+ AArch64::ATOMIC_CMP_SWAP_I16,
+ AArch64::ATOMIC_CMP_SWAP_I32,
+ AArch64::ATOMIC_CMP_SWAP_I64);
case ISD::FrameIndex: {
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
EVT PtrTy = TLI.getPointerTy();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e9f4497..56f6751 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -59,13 +59,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
computeRegisterProperties();
- // Some atomic operations can be folded into load-acquire or store-release
- // instructions on AArch64. It's marginally simpler to let LLVM expand
- // everything out to a barrier and then recombine the (few) barriers we can.
- setInsertFencesForAtomic(true);
- setTargetDAGCombine(ISD::ATOMIC_FENCE);
- setTargetDAGCombine(ISD::ATOMIC_STORE);
-
// We combine OR nodes for bitfield and NEON BSL operations.
setTargetDAGCombine(ISD::OR);
@@ -275,27 +268,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
return VT.changeVectorElementTypeToInteger();
}
-static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
- unsigned &strOpc) {
- switch (Size) {
- default: llvm_unreachable("unsupported size for atomic binary op!");
- case 1:
- ldrOpc = AArch64::LDXR_byte;
- strOpc = AArch64::STXR_byte;
- break;
- case 2:
- ldrOpc = AArch64::LDXR_hword;
- strOpc = AArch64::STXR_hword;
- break;
- case 4:
- ldrOpc = AArch64::LDXR_word;
- strOpc = AArch64::STXR_word;
- break;
- case 8:
- ldrOpc = AArch64::LDXR_dword;
- strOpc = AArch64::STXR_dword;
- break;
- }
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+ unsigned &LdrOpc,
+ unsigned &StrOpc) {
+ static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
+ AArch64::LDXR_word, AArch64::LDXR_dword};
+ static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
+ AArch64::LDAXR_word, AArch64::LDAXR_dword};
+ static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
+ AArch64::STXR_word, AArch64::STXR_dword};
+ static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword,
+ AArch64::STLXR_word, AArch64::STLXR_dword};
+
+ unsigned *LoadOps, *StoreOps;
+ if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ LoadOps = LoadAcqs;
+ else
+ LoadOps = LoadBares;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ StoreOps = StoreRels;
+ else
+ StoreOps = StoreBares;
+
+ assert(isPowerOf2_32(Size) && Size <= 8 &&
+ "unsupported size for atomic binary op!");
+
+ LdrOpc = LoadOps[Log2_32(Size)];
+ StrOpc = StoreOps[Log2_32(Size)];
}
MachineBasicBlock *
@@ -313,12 +313,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, ldrOpc, strOpc);
+ getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -397,6 +398,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+
unsigned oldval = dest;
DebugLoc dl = MI->getDebugLoc();
@@ -411,7 +414,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
}
unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, ldrOpc, strOpc);
+ getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -479,6 +482,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
unsigned ptr = MI->getOperand(1).getReg();
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
@@ -487,7 +491,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, ldrOpc, strOpc);
+ getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -777,6 +781,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
+ case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
default: return NULL;
@@ -1662,17 +1667,26 @@ AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- assert(getTargetMachine().getCodeModel() == CodeModel::Small
- && "Only small code model supported at the moment");
-
- // The most efficient code is PC-relative anyway for the small memory model,
- // so we don't need to worry about relocation model.
- return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
- DAG.getTargetBlockAddress(BA, PtrVT, 0,
- AArch64II::MO_NO_FLAG),
- DAG.getTargetBlockAddress(BA, PtrVT, 0,
- AArch64II::MO_LO12),
- DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+ switch(getTargetMachine().getCodeModel()) {
+ case CodeModel::Small:
+ // The most efficient code is PC-relative anyway for the small memory model,
+ // so we don't need to worry about relocation model.
+ return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+ DAG.getTargetBlockAddress(BA, PtrVT, 0,
+ AArch64II::MO_NO_FLAG),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0,
+ AArch64II::MO_LO12),
+ DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+ case CodeModel::Large:
+ return DAG.getNode(
+ AArch64ISD::WrapperLarge, DL, PtrVT,
+ DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
+ default:
+ llvm_unreachable("Only small and large code models supported now");
+ }
}
@@ -1841,12 +1855,33 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
}
SDValue
-AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
- SelectionDAG &DAG) const {
- // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
- // we make that distinction here.
+AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(getTargetMachine().getCodeModel() == CodeModel::Large);
+ assert(getTargetMachine().getRelocationModel() == Reloc::Static);
+
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GN->getGlobal();
+
+ SDValue GlobalAddr = DAG.getNode(
+ AArch64ISD::WrapperLarge, dl, PtrVT,
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
- // We support the small memory model for now.
+ if (GN->getOffset() != 0)
+ return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+ DAG.getConstant(GN->getOffset(), PtrVT));
+
+ return GlobalAddr;
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
+ SelectionDAG &DAG) const {
assert(getTargetMachine().getCodeModel() == CodeModel::Small);
EVT PtrVT = getPointerTy();
@@ -1925,6 +1960,22 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
return GlobalRef;
}
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+ SelectionDAG &DAG) const {
+ // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+ // we make those distinctions here.
+
+ switch (getTargetMachine().getCodeModel()) {
+ case CodeModel::Small:
+ return LowerGlobalAddressELFSmall(Op, DAG);
+ case CodeModel::Large:
+ return LowerGlobalAddressELFLarge(Op, DAG);
+ default:
+ llvm_unreachable("Only small and large code models supported now");
+ }
+}
+
SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
SDValue DescAddr,
DebugLoc DL,
@@ -1974,6 +2025,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small
+ && "TLS only supported in small memory model");
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -2082,14 +2135,27 @@ SDValue
AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
DebugLoc dl = JT->getDebugLoc();
+ EVT PtrVT = getPointerTy();
// When compiling PIC, jump tables get put in the code section so a static
// relocation-style is acceptable for both cases.
- return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
- DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
- DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
- AArch64II::MO_LO12),
- DAG.getConstant(1, MVT::i32));
+ switch (getTargetMachine().getCodeModel()) {
+ case CodeModel::Small:
+ return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ AArch64II::MO_LO12),
+ DAG.getConstant(1, MVT::i32));
+ case CodeModel::Large:
+ return DAG.getNode(
+ AArch64ISD::WrapperLarge, dl, PtrVT,
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
+ default:
+ llvm_unreachable("Only small and large code models supported now");
+ }
}
// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
@@ -2377,78 +2443,6 @@ static SDValue PerformANDCombine(SDNode *N,
DAG.getConstant(LSB + Width - 1, MVT::i64));
}
-static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
- TargetLowering::DAGCombinerInfo &DCI) {
- // An atomic operation followed by an acquiring atomic fence can be reduced to
- // an acquiring load. The atomic operation provides a convenient pointer to
- // load from. If the original operation was a load anyway we can actually
- // combine the two operations into an acquiring load.
- SelectionDAG &DAG = DCI.DAG;
- SDValue AtomicOp = FenceNode->getOperand(0);
- AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
-
- // A fence on its own can't be optimised
- if (!AtomicNode)
- return SDValue();
-
- AtomicOrdering FenceOrder
- = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
- SynchronizationScope FenceScope
- = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
-
- if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
- return SDValue();
-
- // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
- // the chain we use should be its input, otherwise we'll put our store after
- // it so we use its output chain.
- SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
- AtomicNode->getChain() : AtomicOp;
-
- // We have an acquire fence with a handy atomic operation nearby, we can
- // convert the fence into a load-acquire, discarding the result.
- DebugLoc DL = FenceNode->getDebugLoc();
- SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
- AtomicNode->getValueType(0),
- Chain, // Chain
- AtomicOp.getOperand(1), // Pointer
- AtomicNode->getMemOperand(), Acquire,
- FenceScope);
-
- if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
- DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
-
- return Op.getValue(1);
-}
-
-static SDValue PerformATOMIC_STORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // A releasing atomic fence followed by an atomic store can be combined into a
- // single store operation.
- SelectionDAG &DAG = DCI.DAG;
- AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
- SDValue FenceOp = AtomicNode->getOperand(0);
-
- if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
- return SDValue();
-
- AtomicOrdering FenceOrder
- = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
- SynchronizationScope FenceScope
- = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
-
- if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
- return SDValue();
-
- DebugLoc DL = AtomicNode->getDebugLoc();
- return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
- FenceOp.getOperand(0), // Chain
- AtomicNode->getOperand(1), // Pointer
- AtomicNode->getOperand(2), // Value
- AtomicNode->getMemOperand(), Release,
- FenceScope);
-}
-
/// For a true bitfield insert, the bits getting into that contiguous mask
/// should come from the low part of an existing value: they must be formed from
/// a compatible SHL operation (unless they're already low). This function
@@ -2804,8 +2798,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::AND: return PerformANDCombine(N, DCI);
- case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
- case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::SRA: return PerformSRACombine(N, DCI);
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 4960d28..d49b3ee 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -103,7 +103,12 @@ namespace AArch64ISD {
UBFX,
// Wraps an address which the ISelLowering phase has decided should be
- // created using the small absolute memory model: i.e. adrp/add or
+ // created using the large memory model style: i.e. a sequence of four
+ // movz/movk instructions.
+ WrapperLarge,
+
+ // Wraps an address which the ISelLowering phase has decided should be
+ // created using the small memory model style: i.e. adrp/add or
// adrp/mem-op. This exists to prevent bare TargetAddresses which may never
// get selected.
WrapperSmall
@@ -206,7 +211,11 @@ public:
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+
+ SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index cb93471..9dd122f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
// This file describes AArch64 instruction formats, down to the level of the
// instruction's overall class.
-// ===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 37be5e4..d2cfc7d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -70,12 +70,20 @@ def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
// made for a variable/address at ISelLowering.
// + The output of ISelLowering should be selectable (hence the Wrapper,
// rather than a bare target opcode)
-def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>,
- SDTCisVT<3, i32>,
- SDTCisPtrTy<0>]>;
+def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameAs<0, 4>,
+ SDTCisPtrTy<0>]>;
-def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
+def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>;
+
+def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i32>,
+ SDTCisPtrTy<0>]>;
+
+def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>;
def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
@@ -159,49 +167,55 @@ let Defs = [XSP], Uses = [XSP] in {
// Atomic operation pseudo-instructions
//===----------------------------------------------------------------------===//
-let usesCustomInserter = 1 in {
-multiclass AtomicSizes<string opname> {
- def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
- [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>;
- def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
- [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>;
- def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
- [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>;
- def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
- [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>;
-}
-}
-
-defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">;
-defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">;
-defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">;
-defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">;
-defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">;
-defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
-defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">;
+// These get selected from C++ code as a pretty much direct translation from the
+// generic DAG nodes. The one exception is the AtomicOrdering is added as an
+// operand so that the eventual lowering can make use of it and choose
+// acquire/release operations when required.
+
+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
+multiclass AtomicSizes {
+ def _I8 : PseudoInst<(outs GPR32:$dst),
+ (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+ def _I16 : PseudoInst<(outs GPR32:$dst),
+ (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+ def _I32 : PseudoInst<(outs GPR32:$dst),
+ (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+ def _I64 : PseudoInst<(outs GPR64:$dst),
+ (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
+}
+}
+
+defm ATOMIC_LOAD_ADD : AtomicSizes;
+defm ATOMIC_LOAD_SUB : AtomicSizes;
+defm ATOMIC_LOAD_AND : AtomicSizes;
+defm ATOMIC_LOAD_OR : AtomicSizes;
+defm ATOMIC_LOAD_XOR : AtomicSizes;
+defm ATOMIC_LOAD_NAND : AtomicSizes;
+defm ATOMIC_SWAP : AtomicSizes;
let Defs = [NZCV] in {
// These operations need a CMP to calculate the correct value
- defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
- defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
- defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
- defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
-}
-
-let usesCustomInserter = 1, Defs = [NZCV] in {
-def ATOMIC_CMP_SWAP_I8
- : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
- [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I16
- : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
- [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I32
- : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
- [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I64
- : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
- [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>;
+ defm ATOMIC_LOAD_MIN : AtomicSizes;
+ defm ATOMIC_LOAD_MAX : AtomicSizes;
+ defm ATOMIC_LOAD_UMIN : AtomicSizes;
+ defm ATOMIC_LOAD_UMAX : AtomicSizes;
+}
+
+class AtomicCmpSwap<RegisterClass GPRData>
+ : PseudoInst<(outs GPRData:$dst),
+ (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
+ i32imm:$ordering), []> {
+ let usesCustomInserter = 1;
+ let hasCtrlDep = 1;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let Defs = [NZCV];
}
+def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
+
//===----------------------------------------------------------------------===//
// Add-subtract (extended register) instructions
//===----------------------------------------------------------------------===//
@@ -2579,7 +2593,8 @@ defm LDAR : A64I_LRex<"ldar", 0b101>;
class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{
- return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Acquire || Ordering == SequentiallyConsistent;
}]>;
def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
@@ -2610,7 +2625,8 @@ class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
- return cast<AtomicSDNode>(N)->getOrdering() == Release;
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Release || Ordering == SequentiallyConsistent;
}]>;
def atomic_store_release_8 : releasing_store<atomic_store_8>;
@@ -3863,7 +3879,7 @@ multiclass movw_operands<string prefix, string instname, int width> {
let DiagnosticType = "MOVWUImm16";
}
- def _imm : Operand<i32> {
+ def _imm : Operand<i64> {
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
let PrintMethod = "printMoveWideImmOperand";
let EncoderMethod = "getMoveWideImmOpValue";
@@ -3934,7 +3950,7 @@ multiclass movalias_operand<string prefix, string basename,
# "A64Imms::" # immpredicate # ">";
}
- def _movimm : Operand<i32> {
+ def _movimm : Operand<i64> {
let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
@@ -3958,6 +3974,15 @@ def : movalias<MOVZxii, GPR64, movz64_movimm>;
def : movalias<MOVNwii, GPR32, movn32_movimm>;
def : movalias<MOVNxii, GPR64, movn64_movimm>;
+def movw_addressref : ComplexPattern<i64, 2, "SelectMOVWAddressRef">;
+
+def : Pat<(A64WrapperLarge movw_addressref:$G3, movw_addressref:$G2,
+ movw_addressref:$G1, movw_addressref:$G0),
+ (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref:$G3),
+ movw_addressref:$G2),
+ movw_addressref:$G1),
+ movw_addressref:$G0)>;
+
//===----------------------------------------------------------------------===//
// PC-relative addressing instructions
//===----------------------------------------------------------------------===//
@@ -4454,8 +4479,6 @@ def : ADRP_ADD<A64WrapperSmall, tjumptable>;
// GOT access patterns
//===----------------------------------------------------------------------===//
-// FIXME: Wibble
-
class GOTLoadSmall<SDNode addrfrag>
: Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
(LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index c96bf85..3d22330 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -68,6 +68,18 @@ AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
case AArch64II::MO_TPREL_G0_NC:
Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
break;
+ case AArch64II::MO_ABS_G3:
+ Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext);
+ break;
+ case AArch64II::MO_ABS_G2_NC:
+ Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext);
+ break;
+ case AArch64II::MO_ABS_G1_NC:
+ Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext);
+ break;
+ case AArch64II::MO_ABS_G0_NC:
+ Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext);
+ break;
case AArch64II::MO_NO_FLAG:
// Expr is already correct
break;
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index b83577a..3b811df 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -63,14 +63,15 @@ public:
~AArch64ELFStreamer() {}
- virtual void ChangeSection(const MCSection *Section) {
+ virtual void ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
// default constructor by DenseMap::lookup.
- LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastMappingSymbols[getPreviousSection().first] = LastEMS;
LastEMS = LastMappingSymbols.lookup(Section);
- MCELFStreamer::ChangeSection(Section);
+ MCELFStreamer::ChangeSection(Section, Subsection);
}
/// This function is the one used to emit instruction data into the ELF
@@ -129,7 +130,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection());
+ Symbol->setSection(*getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index c0e3b29..d9798ae 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -133,6 +133,26 @@ public:
return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
}
+ static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_ABS_G3, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx);
+ }
+
+ static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr,
+ MCContext &Ctx) {
+ return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx);
+ }
+
/// @}
/// @name Accessors
/// @{
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 7960db0..819eead 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -81,6 +81,12 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
if (CM == CodeModel::Default)
CM = CodeModel::Small;
+ else if (CM == CodeModel::JITDefault) {
+ // The default MCJIT memory managers make no guarantees about where they can
+ // find an executable page; JITed code needs to be able to refer to globals
+ // no matter how far away they are.
+ CM = CodeModel::Large;
+ }
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index b8099cb..fc706a4 100644
--- a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -19,6 +19,6 @@ using namespace llvm;
Target llvm::TheAArch64Target;
extern "C" void LLVMInitializeAArch64TargetInfo() {
- RegisterTarget<Triple::aarch64>
+ RegisterTarget<Triple::aarch64, /*HasJIT=*/true>
X(TheAArch64Target, "aarch64", "AArch64");
}
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 1678559..bedccb5 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -981,8 +981,11 @@ bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
Rotation = RepeatWidth - Rotation;
}
- uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
- | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+ uint64_t ReplicatedOnes = ReplicatedMask;
+ if (Rotation != 0 && Rotation != 64)
+ ReplicatedOnes = (ReplicatedMask >> Rotation)
+ | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+
// Of course, they may not actually be ones, so we have to check that:
if (!isMask_64(ReplicatedOnes))
continue;
@@ -1051,13 +1054,14 @@ bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
int Rotation = (ImmR & (Width - 1));
uint64_t Mask = (1ULL << Num1s) - 1;
uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
- Mask = (Mask >> Rotation)
- | ((Mask << (Width - Rotation)) & WidthMask);
+ if (Rotation != 0 && Rotation != 64)
+ Mask = (Mask >> Rotation)
+ | ((Mask << (Width - Rotation)) & WidthMask);
- Imm = 0;
- for (unsigned i = 0; i < RegWidth / Width; ++i) {
- Imm |= Mask;
+ Imm = Mask;
+ for (unsigned i = 1; i < RegWidth / Width; ++i) {
Mask <<= Width;
+ Imm |= Mask;
}
return true;
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 1b773d6..9a1ca61 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1037,7 +1037,14 @@ namespace AArch64II {
// MO_LO12 - On a symbol operand, this represents a relocation containing
// lower 12 bits of the address. Used in add/sub/ldr/str.
- MO_LO12
+ MO_LO12,
+
+ // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using
+ // movz/movk instructions.
+ MO_ABS_G3,
+ MO_ABS_G2_NC,
+ MO_ABS_G1_NC,
+ MO_ABS_G0_NC
};
}
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index 6838084..2d747091 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -59,6 +59,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
"Floating point unit supports single precision only">;
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+ "Enable support for TrustZone security extensions">;
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
@@ -144,29 +146,33 @@ include "ARMSchedule.td"
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
"Cortex-A5 ARM processors",
[FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk]>;
+ FeatureVMLxForwarding, FeatureT2XtPk,
+ FeatureTrustZone]>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
[FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk]>;
+ FeatureVMLxForwarding, FeatureT2XtPk,
+ FeatureTrustZone]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
[FeatureVMLxForwarding,
FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR]>;
+ FeatureAvoidPartialCPSR,
+ FeatureTrustZone]>;
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors",
[FeatureNEONForFP, FeatureT2XtPk,
FeatureVFP4, FeatureMP, FeatureHWDiv,
FeatureHWDivARM, FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
- FeatureHasSlowFPVMLx]>;
+ FeatureHasSlowFPVMLx, FeatureTrustZone]>;
// FIXME: It has not been determined if A15 has these features.
def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
[FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR]>;
+ FeatureAvoidPartialCPSR,
+ FeatureTrustZone]>;
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors",
[FeatureSlowFPBrcc, FeatureHWDivARM,
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9e68ff4..6005054 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -283,14 +283,20 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
return false;
--I;
}
- if (!isUnpredicatedTerminator(I))
- return false;
// Get the last instruction in the block.
MachineInstr *LastInst = I;
+ unsigned LastOpc = LastInst->getOpcode();
+
+ // Check if it's an indirect branch first, this should return 'unanalyzable'
+ // even if it's predicated.
+ if (isIndirectBranchOpcode(LastOpc))
+ return true;
+
+ if (!isUnpredicatedTerminator(I))
+ return false;
// If there is only one terminator instruction, process it.
- unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (isUncondBranchOpcode(LastOpc)) {
TBB = LastInst->getOperand(0).getMBB();
@@ -747,10 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Mov->addRegisterKilled(SrcReg, TRI);
}
-static const
-MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
- unsigned Reg, unsigned SubIdx, unsigned State,
- const TargetRegisterInfo *TRI) {
+const MachineInstrBuilder &
+ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const {
if (!SubIdx)
return MIB.addReg(Reg, State);
@@ -795,12 +801,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
- AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ if (Subtarget.hasV5TEOps()) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to STM instruction, which has existed since the dawn of
+ // time.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ }
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -948,7 +964,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
@@ -975,12 +990,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
- unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MachineInstrBuilder MIB;
+
+ if (Subtarget.hasV5TEOps()) {
+ MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to LDM instruction, which has existed since the dawn of
+ // time.
+ MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ }
+
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 7c107bb..2ef659c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -141,6 +141,10 @@ public:
MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+ const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const;
+
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b6b27f8..b0d34a7 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -75,6 +75,12 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
}
const uint32_t*
+ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
+}
+
+const uint32_t*
ARMBaseRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 725033b..0679919 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -96,6 +96,7 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index e6e8c3d..4f94ad2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
+
+ // If we had R3 unallocated only, now we still must to waste it.
+ Reg = State.AllocateReg(GPRArgRegs, 4);
+ assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
// For the 2nd half of a v2f64, do not just fail.
if (CanFail)
return false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index b378b96..8ff666e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
// i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
// (and the same is true for f64 if VFP is not enabled)
CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
- CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
- "ArgFlags.getOrigAlign() != 8",
+ CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
@@ -195,10 +194,21 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8))>;
+// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
+// and the pointer return value are both passed in R0 in these cases, this can
+// be partially modelled by treating R0 as a callee-saved register
+// Only the resulting RegMask is used; the SaveList is ignored
+def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
+ R5, R4, (sequence "D%u", 15, 8),
+ R0)>;
+
// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+ (sub CSR_AAPCS_ThisReturn, R9))>;
+
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
// add is a workaround for not being able to compile empty list:
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 29fcd40..5d45f64 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -144,8 +144,8 @@ class ARMFastISel : public FastISel {
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
- virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI);
+ virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
virtual bool FastLowerArguments();
private:
#include "ARMGenFastISel.inc"
@@ -2605,7 +2605,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned Opc;
bool isBoolZext = false;
- const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC;
switch (SrcVT.SimpleTy) {
default: return 0;
case MVT::i16:
@@ -2797,12 +2797,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
return false;
}
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
/// successful.
-bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI) {
+bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(LI->getType(), VT))
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 7a02adf..483802b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -141,7 +141,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -159,8 +159,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
return;
// Allocate the vararg register save area. This is not counted in NumBytes.
- if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+ if (ArgRegsSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
if (!AFI->hasStackFrame()) {
@@ -357,7 +357,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -471,8 +471,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MBBI = NewMI;
}
- if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+ if (ArgRegsSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1003,7 +1003,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ bool isVarArg = AFI->getArgRegsSaveSize() > 0;
unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
// The emitPopInst calls below do not insert reloads for the aligned DPRCS2
@@ -1174,7 +1174,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (AFI->isThumb1OnlyFunction()) {
// Spill LR if Thumb1 function uses variable length argument lists.
- if (AFI->getVarArgsRegSaveSize() > 0)
+ if (AFI->getArgRegsSaveSize() > 0)
MRI.setPhysRegUsed(ARM::LR);
// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2c51de2..9e1782e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1469,14 +1469,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
- MVT::i32, MVT::Other, Ops, 5);
+ MVT::i32, MVT::Other, Ops);
} else {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
- MVT::i32, MVT::Other, Ops, 6);
+ MVT::i32, MVT::Other, Ops);
}
}
@@ -1525,7 +1525,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
SDValue Ops[]= { Base, Offset, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
- MVT::Other, Ops, 5);
+ MVT::Other, Ops);
}
return NULL;
@@ -1539,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form a D register from a pair of S registers.
@@ -1550,7 +1550,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form a quad register from a pair of D registers.
@@ -1560,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive D registers from a pair of Q registers.
@@ -1570,7 +1570,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive S registers.
@@ -1585,7 +1585,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive D registers.
@@ -1599,7 +1599,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive Q registers.
@@ -1613,7 +1613,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
@@ -1761,7 +1761,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
} else {
// Otherwise, quad registers are loaded with two separate instructions,
@@ -1774,7 +1774,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
- ResTy, AddrTy, MVT::Other, OpsA, 7);
+ ResTy, AddrTy, MVT::Other, OpsA);
Chain = SDValue(VLdA, 2);
// Load the odd subregs.
@@ -1791,8 +1791,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
- Ops.data(), Ops.size());
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
}
// Transfer memoperands.
@@ -1913,8 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- SDNode *VSt =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
@@ -1939,7 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
MemAddr.getValueType(),
- MVT::Other, OpsA, 7);
+ MVT::Other, OpsA);
cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
Chain = SDValue(VStA, 1);
@@ -1958,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Reg0);
Ops.push_back(Chain);
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
- Ops.data(), Ops.size());
+ Ops);
cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
return VStB;
}
@@ -2063,8 +2061,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
QOpcodes[OpcodeIndex]);
- SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
- Ops.data(), Ops.size());
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
if (!IsLoad)
return VLdLn;
@@ -2150,8 +2147,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
- SDNode *VLdDup =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
SuperReg = SDValue(VLdDup, 0);
@@ -2197,7 +2193,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
Ops.push_back(getAL(CurDAG)); // predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
- return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
@@ -2542,7 +2538,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
MVT::i32, MVT::i32, MVT::Other,
- Ops.data() ,Ops.size());
+ Ops);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
return ResNode;
}
@@ -2599,7 +2595,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
- Ops, 4);
+ Ops);
} else {
SDValue Ops[] = {
CPIdx,
@@ -2609,7 +2605,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getEntryNode()
};
ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
- Ops, 5);
+ Ops);
}
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
return NULL;
@@ -2719,7 +2715,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MVT::i32);
SDValue Ops[] = { N0.getOperand(0), Imm16,
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
}
break;
@@ -2733,16 +2729,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::UMULL : ARM::UMULLv5,
- dl, MVT::i32, MVT::i32, Ops, 5);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ISD::SMUL_LOHI: {
@@ -2751,14 +2746,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::SMULL : ARM::SMULLv5,
- dl, MVT::i32, MVT::i32, Ops, 5);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ARMISD::UMLAL:{
@@ -2766,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32)};
- return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
@@ -2774,7 +2769,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::UMLAL : ARM::UMLALv5,
- dl, MVT::i32, MVT::i32, Ops, 7);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ARMISD::SMLAL:{
@@ -2782,7 +2777,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32)};
- return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
@@ -2790,7 +2785,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::SMLAL : ARM::SMLALv5,
- dl, MVT::i32, MVT::i32, Ops, 7);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ISD::LOAD: {
@@ -2833,7 +2828,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MVT::i32);
SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
- MVT::Glue, Ops, 5);
+ MVT::Glue, Ops);
Chain = SDValue(ResNode, 0);
if (N->getNumValues() == 2) {
InFlag = SDValue(ResNode, 1);
@@ -2863,7 +2858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::VUZP: {
unsigned Opc = 0;
@@ -2883,7 +2878,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::VTRN: {
unsigned Opc = 0;
@@ -2902,7 +2897,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::BUILD_VECTOR: {
EVT VecVT = N->getValueType(0);
@@ -3147,8 +3142,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(getAL(CurDAG));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
- SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
- Ops.size());
+ SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3211,8 +3205,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
- SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
- Ops.size());
+ SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3398,7 +3391,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(1));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
}
case ARMISD::VTBL2: {
DebugLoc dl = N->getDebugLoc();
@@ -3414,8 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(2));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
- Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
}
case ISD::CONCAT_VECTORS:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bb26090..e49cfc4 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -729,7 +729,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
(Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
@@ -747,7 +746,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setInsertFencesForAtomic(true);
} else {
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
@@ -765,8 +763,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Unordered/Monotonic case.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
- // Since the libcalls include locking, fold in the fences
- setShouldFoldAtomicFences(true);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -1238,7 +1234,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
@@ -1252,6 +1249,15 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
+ // Pass 'this' value directly from the argument to return value, to avoid
+ // reg unit interference
+ if (i == 0 && isThisReturn) {
+ assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
+ "unexpected return calling convention register assignment");
+ InVals.push_back(ThisVal);
+ continue;
+ }
+
SDValue Val;
if (VA.needsCustom()) {
// Handle f64 or half of a v2f64.
@@ -1363,21 +1369,22 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
- bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
- bool IsSibCall = false;
+ bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool isThisReturn = false;
+ bool isSibCall = false;
// Disable tail calls if they're not supported.
if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
++NumTailCalls;
- IsSibCall = true;
+ isSibCall = true;
}
}
@@ -1393,12 +1400,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned NumBytes = CCInfo.getNextStackOffset();
// For tail calls, memory operands are available in our caller's stack.
- if (IsSibCall)
+ if (isSibCall)
NumBytes = 0;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- if (!IsSibCall)
+ if (!isSibCall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -1460,6 +1467,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
+ if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+ assert(VA.getLocVT() == MVT::i32 &&
+ "unexpected calling convention register assignment");
+ assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
+ "unexpected use of 'returned'");
+ isThisReturn = true;
+ }
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
assert(VA.isMemLoc());
@@ -1467,10 +1481,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// True if this byval aggregate will be split between registers
// and memory.
- if (CCInfo.isFirstByValRegValid()) {
+ unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+
+ if (CurByValIdx < ByValArgsCount) {
+
+ unsigned RegBegin, RegEnd;
+ CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned int i, j;
- for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -1479,11 +1500,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
- offset = ARM::R4 - CCInfo.getFirstByValReg();
- CCInfo.clearFirstByValReg();
+
+ // If parameter size outsides register area, "offset" value
+ // helps us to calculate stack slot for remained part properly.
+ offset = RegEnd - RegBegin;
+
+ CCInfo.nextInRegsParam();
}
- if (Flags.getByValSize() - 4*offset > 0) {
+ if (Flags.getByValSize() > 4*offset) {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
@@ -1499,7 +1524,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops, array_lengthof(Ops)));
}
- } else if (!IsSibCall) {
+ } else if (!isSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1539,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
- InFlag =SDValue();
+ InFlag = SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -1680,8 +1705,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
+ const uint32_t *Mask;
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ if (isThisReturn)
+ // For 'this' returns, use the R0-preserving mask
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ else
+ Mask = ARI->getCallPreservedMask(CallConv);
+
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1703,8 +1735,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, isThisReturn,
+ isThisReturn ? OutVals[0] : SDValue());
}
/// HandleByVal - Every parameter *after* a byval parameter is passed
@@ -1718,8 +1751,24 @@ ARMTargetLowering::HandleByVal(
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((!State->isFirstByValRegValid()) &&
- (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+
+ // For in-prologue parameters handling, we also introduce stack offset
+ // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
+ // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
+ // NSAA should be evaluted (NSAA means "next stacked argument address").
+ // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
+ // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
+ unsigned NSAAOffset = State->getNextStackOffset();
+ if (State->getCallOrPrologue() != Call) {
+ for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
+ unsigned RB, RE;
+ State->getInRegsParamInfo(i, RB, RE);
+ assert(NSAAOffset >= (RE-RB)*4 &&
+ "Stack offset for byval regs doesn't introduced anymore?");
+ NSAAOffset -= (RE-RB)*4;
+ }
+ }
+ if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
@@ -1727,22 +1776,45 @@ ARMTargetLowering::HandleByVal(
reg = State->AllocateReg(GPRArgRegs, 4);
}
if (reg != 0) {
- State->setFirstByValReg(reg);
+ unsigned excess = 4 * (ARM::R4 - reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+ return;
+ }
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = reg;
+ unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4;
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = reg+1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs, 4);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ // Make remained size equal to 0 in case, when
+ // the whole structure may be stored into registers.
+ if (size < excess)
+ size = 0;
+ else
+ size -= excess;
}
}
}
- // Confiscate any remaining parameter registers to preclude their
- // assignment to subsequent parameters.
- while (State->AllocateReg(GPRArgRegs, 4))
- ;
}
/// MatchingStackOffset - Return true if the given stack call argument is
@@ -1874,7 +1946,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// local frame.
const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
getInfo<ARMFunctionInfo>();
- if (AFI_Caller->getVarArgsRegSaveSize())
+ if (AFI_Caller->getArgRegsSaveSize())
return false;
// If the callee takes no arguments then go on to check the results of the
@@ -2461,35 +2533,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
}
-static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
- DebugLoc dl = Op.getDebugLoc();
- if (!Subtarget->hasDataBarrier()) {
- // Some ARMv6 cpus can support data barriers with an mcr instruction.
- // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
- // here.
- assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- }
-
- SDValue Op5 = Op.getOperand(5);
- bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
- unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
-
- ARM_MB::MemBOpt DMBOpt;
- if (isDeviceBarrier)
- DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
- else
- DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
- return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(DMBOpt, MVT::i32));
-}
-
-
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// FIXME: handle "fence singlethread" more efficiently.
@@ -2586,12 +2629,16 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned &VARegSize, unsigned &VARegSaveSize)
+ unsigned InRegsParamRecordIdx,
+ unsigned &ArgRegsSize,
+ unsigned &ArgRegsSaveSize)
const {
unsigned NumGPRs;
- if (CCInfo.isFirstByValRegValid())
- NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
- else {
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ NumGPRs = REnd - RBegin;
+ } else {
unsigned int firstUnalloced;
firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
sizeof(GPRArgRegs) /
@@ -2600,8 +2647,8 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
}
unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
- VARegSize = NumGPRs * 4;
- VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+ ArgRegsSize = NumGPRs * 4;
+ ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1);
}
// The remaining GPRs hold either the beginning of variable-argument
@@ -2611,40 +2658,60 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
// If this is a variadic function, the va_list pointer will begin with
// these values; otherwise, this reassembles a (byval) structure that
// was split between registers and memory.
-void
-ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- bool ForceMutable) const {
+// Return: The frame index registers were stored into.
+int
+ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
+
+ // Currently, two use-cases possible:
+ // Case #1. Non var-args function, and we meet first byval parameter.
+ // Setup first unallocated register as first byval register;
+ // eat all remained registers
+ // (these two actions are performed by HandleByVal method).
+ // Then, here, we initialize stack frame with
+ // "store-reg" instructions.
+ // Case #2. Var-args function, that doesn't contain byval parameters.
+ // The same: eat all remained unallocated registers,
+ // initialize stack frame.
+
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex;
- if (CCInfo.isFirstByValRegValid())
- firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
- else {
+ unsigned firstRegToSaveIndex, lastRegToSaveIndex;
+ unsigned RBegin, REnd;
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ firstRegToSaveIndex = RBegin - ARM::R0;
+ lastRegToSaveIndex = REnd - ARM::R0;
+ } else {
firstRegToSaveIndex = CCInfo.getFirstUnallocated
(GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ lastRegToSaveIndex = 4;
}
- unsigned VARegSize, VARegSaveSize;
- computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
- if (VARegSaveSize) {
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing
- // the result of va_next.
- AFI->setVarArgsRegSaveSize(VARegSaveSize);
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
- ArgOffset + VARegSaveSize
- - VARegSize,
- false));
- SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
- getPointerTy());
+ unsigned ArgRegsSize, ArgRegsSaveSize;
+ computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize);
+
+ // Store any by-val regs to their spots on the stack so that they may be
+ // loaded by deferencing the result of formal parameter pointer or va_next.
+ // Note: once stack area for byval/varargs registers
+ // was initialized, it can't be initialized again.
+ if (ArgRegsSaveSize) {
+
+ int FrameIndex = MFI->CreateFixedObject(
+ ArgRegsSaveSize,
+ ArgOffset + ArgRegsSaveSize - ArgRegsSize,
+ false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+ for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
+ ++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
@@ -2661,13 +2728,37 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
+
+ AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
+ return FrameIndex;
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(
- MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
+ return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable);
+}
+
+// Setup stack frame, the va_list pointer will start from.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Try to store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ // If there is no regs to be stored, just point address after last
+ // argument passed via stack.
+ int FrameIndex =
+ StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
+ 0, ArgOffset, ForceMutable);
+
+ AFI->setVarArgsFrameIndex(FrameIndex);
}
SDValue
@@ -2696,6 +2787,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
+
+ // Initially ArgRegsSaveSize is zero.
+ // Then we increase this value each time we meet byval parameter.
+ // We also increase this value in case of varargs function.
+ AFI->setArgRegsSaveSize(0);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
@@ -2793,20 +2890,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (!AFI->getVarArgsFrameIndex()) {
- VarArgStyleRegisters(CCInfo, DAG,
- dl, Chain, CurOrigArg,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- true /*force mutable frames*/);
- int VAFrameIndex = AFI->getVarArgsFrameIndex();
- InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
- } else {
- int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
- }
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
+ InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+ CCInfo.nextInRegsParam();
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
@@ -2824,7 +2916,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// varargs
if (isVarArg)
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset());
return Chain;
@@ -5165,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return false;
}
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+ if (OrigVT.getSizeInBits() >= 64)
+ return OrigVT;
+
+ assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+ MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+ switch (OrigSimpleTy) {
+ default: llvm_unreachable("Unexpected Vector Type");
+ case MVT::v2i8:
+ case MVT::v2i16:
+ return MVT::v2i32;
+ case MVT::v4i8:
+ return MVT::v4i16;
+ }
+}
+
/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
/// We insert the required extension here to get the vector to fill a D register.
@@ -5180,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
- MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
- EVT NewVT;
- switch (OrigSimpleTy) {
- default: llvm_unreachable("Unexpected Orig Vector Type");
- case MVT::v2i8:
- case MVT::v2i16:
- NewVT = MVT::v2i32;
- break;
- case MVT::v4i8:
- NewVT = MVT::v4i16;
- break;
- }
+ EVT NewVT = getExtensionTo64Bits(OrigTy);
+
return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
}
@@ -5201,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
/// reach a total size of 64 bits. We have to add the extension separately
/// because ARM does not have a sign/zero extending load for vectors.
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
- SDValue NonExtendingLoad =
- DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+ EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
+
+ // The load already has the right type.
+ if (ExtendedTy == LD->getMemoryVT())
+ return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
LD->isNonTemporal(), LD->isInvariant(),
LD->getAlignment());
- unsigned ExtOp = 0;
- switch (LD->getExtensionType()) {
- default: llvm_unreachable("Unexpected LoadExtType");
- case ISD::EXTLOAD:
- case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
- }
- MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
- MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
- return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
- MemType, ExtType, ExtOp);
+
+ // We need to create a zextload/sextload. We cannot just create a load
+ // followed by a zext/zext node because LowerMUL is also run during normal
+ // operation legalization where we can't create illegal types.
+ return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy,
+ LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
}
/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
@@ -5614,7 +5713,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 9ee17f0..426010e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -464,7 +464,8 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -473,16 +474,23 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const;
+
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned OffsetFromOrigArg,
unsigned ArgOffset,
- bool ForceMutable = false)
- const;
+ bool ForceMutable = false) const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned &VARegSize, unsigned &VARegSaveSize) const;
+ unsigned InRegsParamRecordIdx,
+ unsigned &ArgRegsSize,
+ unsigned &ArgRegsSaveSize) const;
virtual SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 11550c5..1bd174e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -221,6 +221,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
AssemblerPredicate<"FeatureMP",
"mp-extensions">;
+def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
+ AssemblerPredicate<"FeatureTrustZone",
+ "TrustZone">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">,
@@ -578,6 +581,17 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+/// imm0_4 predicate - Immediate in the range [0,4].
+def Imm0_4AsmOperand : ImmAsmOperand
+{
+ let Name = "Imm0_4";
+ let DiagnosticType = "ImmRange0_4";
+}
+def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
+ let ParserMatchClass = Imm0_4AsmOperand;
+ let DecoderMethod = "DecodeImm0_4";
+}
+
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -741,18 +755,26 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
// addrmode_imm12 := reg +/- imm12
//
def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-def addrmode_imm12 : Operand<i32>,
+class AddrMode_Imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
// 12-bit immediate operand. Note that instructions using this encode
// #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
// immediate values are as normal.
let EncoderMethod = "getAddrModeImm12OpValue";
- let PrintMethod = "printAddrModeImm12Operand";
let DecoderMethod = "DecodeAddrModeImm12Operand";
let ParserMatchClass = MemImm12OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
+
+def addrmode_imm12 : AddrMode_Imm12 {
+ let PrintMethod = "printAddrModeImm12Operand<false>";
+}
+
+def addrmode_imm12_pre : AddrMode_Imm12 {
+ let PrintMethod = "printAddrModeImm12Operand<true>";
+}
+
// ldst_so_reg := reg +/- reg shop imm
//
def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
@@ -852,14 +874,23 @@ def am2offset_imm : Operand<i32>,
//
// FIXME: split into imm vs. reg versions.
def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-def addrmode3 : Operand<i32>,
- ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+class AddrMode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
let EncoderMethod = "getAddrMode3OpValue";
- let PrintMethod = "printAddrMode3Operand";
let ParserMatchClass = AddrMode3AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
+def addrmode3 : AddrMode3
+{
+ let PrintMethod = "printAddrMode3Operand<false>";
+}
+
+def addrmode3_pre : AddrMode3
+{
+ let PrintMethod = "printAddrMode3Operand<true>";
+}
+
// FIXME: split into imm vs. reg versions.
// FIXME: parser method to handle +/- register.
def AM3OffsetAsmOperand : AsmOperandClass {
@@ -885,15 +916,22 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
// addrmode5 := reg +/- imm8*4
//
def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-def addrmode5 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode5", []> {
- let PrintMethod = "printAddrMode5Operand";
+class AddrMode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
let EncoderMethod = "getAddrMode5OpValue";
let DecoderMethod = "DecodeAddrMode5Operand";
let ParserMatchClass = AddrMode5AsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm);
}
+def addrmode5 : AddrMode5 {
+ let PrintMethod = "printAddrMode5Operand<false>";
+}
+
+def addrmode5_pre : AddrMode5 {
+ let PrintMethod = "printAddrMode5Operand<true>";
+}
+
// addrmode6 := reg with optional alignment
//
def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
@@ -1668,11 +1706,11 @@ def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
NoItinerary, []>;
}
-def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
"hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
- bits<8> imm;
- let Inst{27-8} = 0b00110010000011110000;
- let Inst{7-0} = imm;
+ bits<3> imm;
+ let Inst{27-3} = 0b0011001000001111000000000;
+ let Inst{2-0} = imm;
}
def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -2077,7 +2115,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
// Secure Monitor Call is a system instruction.
def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
- []> {
+ []>, Requires<[IsARM, HasTrustZone]> {
bits<4> opt;
let Inst{23-4} = 0b01100000000000000111;
let Inst{3-0} = opt;
@@ -2238,7 +2276,7 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
multiclass AI2_ldridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
+ (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 0;
@@ -2275,6 +2313,7 @@ multiclass AI2_ldridx<bit isByte, string opc,
let Inst{23} = offset{12};
let Inst{19-16} = addr;
let Inst{11-0} = offset{11-0};
+ let Inst{4} = 0;
let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
@@ -2307,7 +2346,7 @@ defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode3:$addr), IndexModePre,
+ (ins addrmode3_pre:$addr), IndexModePre,
LdMiscFrm, itin,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<14> addr;
@@ -2341,7 +2380,7 @@ defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
let hasExtraDefRegAllocReq = 1 in {
def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
- (ins addrmode3:$addr), IndexModePre,
+ (ins addrmode3_pre:$addr), IndexModePre,
LdMiscFrm, IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!",
"$addr.base = $Rn_wb", []> {
@@ -2497,7 +2536,7 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
multiclass AI2_stridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+ (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
StFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
@@ -2619,7 +2658,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+ (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
StMiscFrm, IIC_iStore_bh_ru,
"strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<14> addr;
@@ -2651,7 +2690,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
IndexModePre, StMiscFrm, IIC_iStore_d_ru,
"strd", "\t$Rt, $Rt2, $addr!",
"$addr.base = $Rn_wb", []> {
@@ -4426,7 +4465,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
let Inst{7-0} = addr{7-0};
let DecoderMethod = "DecodeCopMemInstruction";
}
- def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
asm, "\t$cop, $CRd, $addr!", IndexModePre> {
bits<13> addr;
bits<4> cop;
@@ -4497,7 +4536,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
let Inst{7-0} = addr{7-0};
let DecoderMethod = "DecodeCopMemInstruction";
}
- def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
asm, "\t$cop, $CRd, $addr!", IndexModePre> {
bits<13> addr;
bits<4> cop;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index 0411ac4..896fd0f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -4316,6 +4316,24 @@ def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+
// Vector Bitwise Operations.
def vnotd : PatFrag<(ops node:$in),
@@ -4889,6 +4907,29 @@ def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v4f32, v4f32, fabs>;
+def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
+ (v2i32 (bitconvert (v8i8 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 7))))))),
+ (VABSv8i8 DPR:$src)>;
+def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
+ (v2i32 (bitconvert (v4i16 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 15))))))),
+ (VABSv4i16 DPR:$src)>;
+def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
+ (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
+ (VABSv2i32 DPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
+ (v4i32 (bitconvert (v16i8 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 7))))))),
+ (VABSv16i8 QPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
+ (v4i32 (bitconvert (v8i16 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 15))))))),
+ (VABSv8i16 QPR:$src)>;
+def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
+ (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
+ (VABSv4i32 QPR:$src)>;
+
def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index c9d709e..4dacb86 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -150,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
def t2addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
- let PrintMethod = "printAddrModeImm12Operand";
+ let PrintMethod = "printAddrModeImm12Operand<false>";
let EncoderMethod = "getAddrModeImm12OpValue";
let DecoderMethod = "DecodeT2AddrModeImm12";
let ParserMatchClass = t2addrmode_imm12_asmoperand;
@@ -3401,12 +3401,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
bits<5> mode;
bit M;
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0;
- let Inst{25-20} = 0b111010;
- let Inst{19-16} = 0b1111;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
+ let Inst{31-11} = 0b111100111010111110000;
let Inst{10-9} = imod;
let Inst{8} = M;
let Inst{7-5} = iflags;
@@ -3425,13 +3420,13 @@ let imod = 0, iflags = 0, M = 1 in
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
- bits<8> imm;
- let Inst{31-8} = 0b111100111010111110000000;
- let Inst{7-0} = imm;
+def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> {
+ bits<3> imm;
+ let Inst{31-3} = 0b11110011101011111000000000000;
+ let Inst{2-0} = imm;
}
-def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>;
def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
@@ -3449,7 +3444,8 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
// Secure Monitor Call is a system instruction.
// Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []>, Requires<[IsThumb2, HasTrustZone]> {
let Inst{31-27} = 0b11110;
let Inst{26-20} = 0b1111111;
let Inst{15-12} = 0b1000;
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index b7ac5d5..c8ed576 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -87,53 +87,6 @@ namespace {
MachineBasicBlock::iterator i)
: Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
};
- class UnitRegsMap {
- public:
- UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {}
- const SmallVector<unsigned, 4>& operator[](unsigned Reg) {
- DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found =
- Cache.find(Reg);
- if (found != Cache.end())
- return found->second;
- else
- return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg)))
- .first->second;
- }
- private:
- SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) {
- SmallVector<unsigned, 4> Res;
-
- const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg);
- if (TRC == &ARM::QPRRegClass) {
- if (Reg > ARM::Q7) {
- Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0));
- Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1));
- return Res;
- }
-
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2));
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3));
-
- return Res;
- }
-
- if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) {
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
- Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
-
- return Res;
- }
-
- Res.push_back(Reg);
-
- return Res;
-
- }
- const TargetRegisterInfo* TRI;
- DenseMap<unsigned, SmallVector<unsigned, 4> > Cache;
- };
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
@@ -175,11 +128,6 @@ namespace {
MachineBasicBlock::iterator MBBI,
bool &Advance,
MachineBasicBlock::iterator &I);
- unsigned AddMemOp(MemOpQueue& MemOps,
- const MemOpQueueEntry newEntry,
- UnitRegsMap& UnitRegsInfo,
- SmallSet<unsigned, 4>& UsedUnitRegs,
- unsigned At = -1U);
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
@@ -1265,103 +1213,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
return false;
}
-/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti.
-/// It adds store mem ops with simple push_back/insert method,
-/// without any additional logic.
-/// For load operation it does the next:
-/// 1. Adds new load operation into MemOp collection at "At" position.
-/// 2. Removes any "load" operations from MemOps, that changes "Reg" register
-/// contents, prior to "At".
-/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector >
-/// UsedUnitRegs - set of unit-registers currently in use.
-/// At - position at which it would added, and prior which the clean-up
-/// should be made (for load operation).
-/// FIXME: The clean-up also should be made for store operations,
-/// but the memory address should be analyzed instead of unit registers.
-unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps,
- const MemOpQueueEntry NewEntry,
- UnitRegsMap& UnitRegsInfo,
- SmallSet<unsigned, 4>& UsedUnitRegs,
- unsigned At) {
- unsigned Cleaned = 0;
-
- if (At == -1U) {
- At = MemOps.size();
- MemOps.push_back(NewEntry);
- } else
- MemOps.insert(&MemOps[At], NewEntry);
-
- // FIXME:
- // If operation is not load, leave it as is by now,
- // So 0 overridden ops would cleaned in this case.
- if (!NewEntry.MBBI->mayLoad())
- return 0;
-
- const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg];
-
- bool FoundOverriddenLoads = false;
-
- for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i)
- if (UsedUnitRegs.count(NewEntryUnitRegs[i])) {
- FoundOverriddenLoads = true;
- break;
- }
-
- // If we detect that this register is used by load operations that are
- // predecessors for the new one, remove them from MemOps then.
- if (FoundOverriddenLoads) {
- MemOpQueue UpdatedMemOps;
-
- // Scan through MemOps entries.
- for (unsigned i = 0; i != At; ++i) {
- MemOpQueueEntry& MemOpEntry = MemOps[i];
-
- // FIXME: Skip non-load operations by now.
- if (!MemOpEntry.MBBI->mayLoad())
- continue;
-
- const SmallVector<unsigned, 4>& MemOpUnitRegs =
- UnitRegsInfo[MemOpEntry.Reg];
-
- // Lookup entry that loads contents into register used by new entry.
- bool ReleaseThisEntry = false;
- for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) {
- if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(),
- MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) {
- ReleaseThisEntry = true;
- ++Cleaned;
- break;
- }
- }
-
- if (ReleaseThisEntry) {
- const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg];
- for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r)
- UsedUnitRegs.erase(RelesedRegs[r]);
- } else
- UpdatedMemOps.push_back(MemOpEntry);
- }
-
- // Keep anything without changes after At position.
- for (unsigned i = At, e = MemOps.size(); i != e; ++i)
- UpdatedMemOps.push_back(MemOps[i]);
-
- MemOps.swap(UpdatedMemOps);
- }
-
- UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end());
-
- return Cleaned;
-}
-
/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
/// ops of the same base and incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
unsigned NumMerges = 0;
unsigned NumMemOps = 0;
MemOpQueue MemOps;
- UnitRegsMap UnitRegsInfo(TRI);
- SmallSet<unsigned, 4> UsedRegUnits;
unsigned CurrBase = 0;
int CurrOpc = -1;
unsigned CurrSize = 0;
@@ -1401,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// merge the ldr's so far, including this one. But don't try to
// combine the following ldr(s).
Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+
+ // Watch out for:
+ // r4 := ldr [r0, #8]
+ // r4 := ldr [r0, #4]
+ //
+ // The optimization may reorder the second ldr in front of the first
+ // ldr, which violates write after write(WAW) dependence. The same as
+ // str. Try to merge inst(s) already in MemOps.
+ bool Overlap = false;
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
+ if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
+ Overlap = true;
+ break;
+ }
+ }
+
if (CurrBase == 0 && !Clobber) {
// Start of a new chain.
CurrBase = Base;
@@ -1408,13 +1281,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrSize = Size;
CurrPred = Pred;
CurrPredReg = PredReg;
-
MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
++NumMemOps;
- const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg];
- UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end());
Advance = true;
- } else {
+ } else if (!Overlap) {
if (Clobber) {
TryMerge = true;
Advance = true;
@@ -1424,24 +1294,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// No need to match PredReg.
// Continue adding to the queue.
if (Offset > MemOps.back().Offset) {
- unsigned OverridesCleaned =
- AddMemOp(MemOps,
- MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI),
- UnitRegsInfo, UsedRegUnits) != 0;
- NumMemOps += 1 - OverridesCleaned;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
- for (unsigned I = 0; I != NumMemOps; ++I) {
- if (Offset < MemOps[I].Offset) {
- MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI);
- unsigned OverridesCleaned =
- AddMemOp(MemOps, entry, UnitRegsInfo,
- UsedRegUnits, I) != 0;
- NumMemOps += 1 - OverridesCleaned;
-
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+ I != E; ++I) {
+ if (Offset < I->Offset) {
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
break;
- } else if (Offset == MemOps[I].Offset) {
+ } else if (Offset == I->Offset) {
// Collision! This can't be merged!
break;
}
@@ -1512,7 +1378,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrPredReg = 0;
if (NumMemOps) {
MemOps.clear();
- UsedRegUnits.clear();
NumMemOps = 0;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 88d96c0..f4248fc 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -38,7 +38,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
- unsigned VarArgsRegSaveSize;
+ unsigned ArgRegsSaveSize;
/// HasStackFrame - True if this function has a stack frame. Set by
/// processFunctionBeforeCalleeSavedScan().
@@ -117,7 +117,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -129,7 +129,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -141,8 +141,8 @@ public:
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
- unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
- void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+ unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
+ void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 739300e..8653c46 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -91,6 +91,7 @@ void ARMSubtarget::initializeEnvironment() {
HasRAS = false;
HasMPExtension = false;
FPOnlySP = false;
+ HasTrustZone = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 5b5ee6a..038eb76 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -148,6 +148,9 @@ protected:
/// precision.
bool FPOnlySP;
+ /// HasTrustZone - if true, processor supports TrustZone security extensions
+ bool HasTrustZone;
+
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -251,6 +254,7 @@ public:
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
+ bool hasTrustZone() const { return HasTrustZone; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 1019b97..53ece66 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -125,6 +125,10 @@ public:
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
unsigned getAddressComputationCost(Type *Val) const;
+
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info = OK_AnyValue,
+ OperandValueKind Op2Info = OK_AnyValue) const;
/// @}
};
@@ -223,9 +227,9 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
- // Operations that we legalize using load/stores to the stack.
- { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
- { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+ // Operations that we legalize using splitting.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
@@ -456,3 +460,67 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
return LT.first * NEONShuffleTbl[Idx].Cost;
}
+
+unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+
+ int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ const unsigned FunctionCallDivCost = 20;
+ const unsigned ReciprocalDivCost = 10;
+ static const CostTblEntry<MVT> CostTbl[] = {
+ // Division.
+ // These costs are somewhat random. Choose a cost of 20 to indicate that
+ // vectorizing devision (added function call) is going to be very expensive.
+ // Double registers types.
+ { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ // Quad register types.
+ { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ // Multiplication.
+ };
+
+ int Idx = -1;
+
+ if (ST->hasNEON())
+ Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode,
+ LT.second);
+
+ if (Idx != -1)
+ return LT.first * CostTbl[Idx].Cost;
+
+
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ed7b7ec..1dd2953 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -86,11 +86,11 @@ class ARMAsmParser : public MCTargetAsmParser {
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Warning(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
return Parser.Warning(L, Msg, Ranges);
}
bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
return Parser.Error(L, Msg, Ranges);
}
@@ -610,6 +610,13 @@ public:
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
+ bool isImm0_4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 5;
+ }
bool isImm0_1020s4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -4745,6 +4752,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
+ Mnemonic == "vaclt" || Mnemonic == "vacle" ||
Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
@@ -5014,8 +5022,8 @@ static bool isDataTypeToken(StringRef Tok) {
static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
-
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+ unsigned VariantID);
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc,
@@ -5026,7 +5034,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// MatchInstructionImpl(), but that's too late for aliases that include
// any sort of suffix.
unsigned AvailableFeatures = getAvailableFeatures();
- applyMnemonicAliases(Name, AvailableFeatures);
+ unsigned AssemblerDialect = getParser().getAssemblerDialect();
+ applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
// First check for the ARM-specific .req directive.
if (Parser.getTok().is(AsmToken::Identifier) &&
@@ -7613,6 +7622,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_ImmRange0_4: {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,4]");
+ }
case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 2e009e5..ac937f3 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -308,6 +308,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -1951,10 +1953,12 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::CreateImm(mode));
if (iflags) S = MCDisassembler::SoftFail;
} else {
- // imod == '00' && M == '0' --> UNPREDICTABLE
- Inst.setOpcode(ARM::t2CPS1p);
- Inst.addOperand(MCOperand::CreateImm(mode));
- S = MCDisassembler::SoftFail;
+ // imod == '00' && M == '0' --> this is a HINT instruction
+ int imm = fieldFromInstruction(Insn, 0, 8);
+ // HINT are defined only for immediate in [0..4]
+ if(imm > 4) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::t2HINT);
+ Inst.addOperand(MCOperand::CreateImm(imm));
}
return S;
@@ -1996,9 +2000,10 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
if (Inst.getOpcode() == ARM::MOVTi16)
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
@@ -3570,7 +3575,7 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
unsigned pred = fieldFromInstruction(Insn, 28, 4);
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
@@ -4496,6 +4501,15 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder)
+{
+ unsigned Imm = fieldFromInstruction(Insn, 0, 3);
+ if (Imm > 4) return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2afb20d..3bcd083 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -490,7 +490,8 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
}
void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+ raw_ostream &O,
+ bool AlwaysPrintImm0) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
@@ -509,7 +510,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
- if (ImmOffs || (op == ARM_AM::sub)) {
+ if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
O << ", "
<< markup("<imm:")
<< "#"
@@ -520,6 +521,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
O << ']' << markup(">");
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
@@ -535,7 +537,7 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
printAM3PostIndexOp(MI, Op, O);
return;
}
- printAM3PreOrOffsetIndexOp(MI, Op, O);
+ printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
}
void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
@@ -593,6 +595,7 @@ void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
O << ARM_AM::getAMSubModeStr(Mode);
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -608,7 +611,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
- if (ImmOffs || Op == ARM_AM::sub) {
+ if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
O << ", "
<< markup("<imm:")
<< "#"
@@ -1022,6 +1025,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -1042,13 +1046,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
OffImm = 0;
if (isSub) {
O << ", "
- << markup("<imm:")
+ << markup("<imm:")
<< "#-" << -OffImm
<< markup(">");
}
- else if (OffImm > 0) {
+ else if (AlwaysPrintImm0 || OffImm > 0) {
O << ", "
- << markup("<imm:")
+ << markup("<imm:")
<< "#" << OffImm
<< markup(">");
}
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index edff75d..344104e 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -47,12 +47,13 @@ public:
raw_ostream &O);
void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
-
+ template <bool AlwaysPrintImm0>
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
- void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
+ bool AlwaysPrintImm0);
void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -60,6 +61,7 @@ public:
raw_ostream &O);
void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -91,6 +93,7 @@ public:
raw_ostream &O);
void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template<bool AlwaysPrintImm0>
void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 418971d..6c3d247 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,7 +13,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMRegisterInfo.h"
#include "ARMUnwindOp.h"
+#include "ARMUnwindOpAsm.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -26,6 +28,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
@@ -33,11 +36,15 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
+ assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index");
+ return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
+}
+
namespace {
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -57,8 +64,9 @@ public:
ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter, bool IsThumb)
: MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
- IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
- FnStart(0), Personality(0), CantUnwind(false) {}
+ IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
+ Reset();
+ }
~ARMELFStreamer() {}
@@ -75,14 +83,15 @@ public:
virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector);
- virtual void ChangeSection(const MCSection *Section) {
+ virtual void ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
// default constructor by DenseMap::lookup.
- LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastMappingSymbols[getPreviousSection().first] = LastEMS;
LastEMS = LastMappingSymbols.lookup(Section);
- MCELFStreamer::ChangeSection(Section);
+ MCELFStreamer::ChangeSection(Section, Subsection);
}
/// This function is the one used to emit instruction data into the ELF
@@ -175,7 +184,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection());
+ Symbol->setSection(*getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
@@ -194,6 +203,7 @@ private:
void Reset();
void EmitPersonalityFixup(StringRef Name);
+ void CollectUnwindOpcodes();
void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
SectionKind Kind, const MCSymbol &Fn);
@@ -210,9 +220,16 @@ private:
MCSymbol *ExTab;
MCSymbol *FnStart;
const MCSymbol *Personality;
+ uint32_t VFPRegSave; // Register mask for {d31-d0}
+ uint32_t RegSave; // Register mask for {r15-r0}
+ int64_t SPOffset;
+ uint16_t FPReg;
+ int64_t FPOffset;
+ bool UsedFP;
bool CantUnwind;
+ UnwindOpcodeAssembler UnwindOpAsm;
};
-}
+} // end anonymous namespace
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
@@ -238,7 +255,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
} else {
EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
}
- assert(EHSection);
+ assert(EHSection && "Failed to get the required EH section");
// Switch to .ARM.extab or .ARM.exidx section
SwitchSection(EHSection);
@@ -262,10 +279,20 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
}
void ARMELFStreamer::Reset() {
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
ExTab = NULL;
FnStart = NULL;
Personality = NULL;
+ VFPRegSave = 0;
+ RegSave = 0;
+ FPReg = MRI.getEncodingValue(ARM::SP);
+ FPOffset = 0;
+ SPOffset = 0;
+ UsedFP = false;
CantUnwind = false;
+
+ UnwindOpAsm.Reset();
}
// Add the R_ARM_NONE fixup at the same position
@@ -284,6 +311,18 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
MCFixup::getKindForSize(4, false)));
}
+void ARMELFStreamer::CollectUnwindOpcodes() {
+ if (UsedFP) {
+ UnwindOpAsm.EmitSetFP(FPReg);
+ UnwindOpAsm.EmitSPOffset(-FPOffset);
+ } else {
+ UnwindOpAsm.EmitSPOffset(SPOffset);
+ }
+ UnwindOpAsm.EmitVFPRegSave(VFPRegSave);
+ UnwindOpAsm.EmitRegSave(RegSave);
+ UnwindOpAsm.Finalize();
+}
+
void ARMELFStreamer::EmitFnStart() {
assert(FnStart == 0);
FnStart = getContext().CreateTempSymbol();
@@ -294,35 +333,29 @@ void ARMELFStreamer::EmitFnEnd() {
assert(FnStart && ".fnstart must preceeds .fnend");
// Emit unwind opcodes if there is no .handlerdata directive
- int PersonalityIndex = -1;
if (!ExTab && !CantUnwind) {
- // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
- SwitchToExTabSection(*FnStart);
-
- // Create .ARM.extab label for offset in .ARM.exidx
- ExTab = getContext().CreateTempSymbol();
- EmitLabel(ExTab);
-
- PersonalityIndex = 1;
-
- uint32_t Entry = 0;
- uint32_t NumExtraEntryWords = 0;
- Entry |= NumExtraEntryWords << 24;
- Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
-
- // TODO: This should be generated according to .save, .vsave, .setfp
- // directives. Currently, we are simply generating FINISH opcode.
- Entry |= UNWIND_OPCODE_FINISH << 8;
- Entry |= UNWIND_OPCODE_FINISH;
-
- EmitIntValue(Entry, 4, 0);
+ CollectUnwindOpcodes();
+
+ unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+ if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 ||
+ PersonalityIndex == AEABI_UNWIND_CPP_PR2) {
+ // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to
+ // emit the unwind opcodes in the corresponding ".ARM.extab" section, and
+ // then emit a reference to these unwind opcodes in the second word of
+ // the exception index table entry.
+ SwitchToExTabSection(*FnStart);
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+ EmitBytes(UnwindOpAsm.data(), 0);
+ }
}
// Emit the exception index table entry
SwitchToExIdxSection(*FnStart);
- if (PersonalityIndex == 1)
- EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+ unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+ if (PersonalityIndex < NUM_PERSONALITY_INDEX)
+ EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
const MCSymbolRefExpr *FnStartRef =
MCSymbolRefExpr::Create(FnStart,
@@ -333,12 +366,22 @@ void ARMELFStreamer::EmitFnEnd() {
if (CantUnwind) {
EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
- } else {
+ } else if (ExTab) {
+ // Emit a reference to the unwind opcodes in the ".ARM.extab" section.
const MCSymbolRefExpr *ExTabEntryRef =
MCSymbolRefExpr::Create(ExTab,
MCSymbolRefExpr::VK_ARM_PREL31,
getContext());
EmitValue(ExTabEntryRef, 4, 0);
+ } else {
+ // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
+ // the second word of exception index table entry. The size of the unwind
+ // opcodes should always be 4 bytes.
+ assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 &&
+ "Compact model must use __aeabi_cpp_unwind_pr0 as personality");
+ assert(UnwindOpAsm.size() == 4u &&
+ "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
+ EmitBytes(UnwindOpAsm.data(), 0);
}
// Clean exception handling frame information
@@ -368,36 +411,54 @@ void ARMELFStreamer::EmitHandlerData() {
EmitValue(PersonalityRef, 4, 0);
// Emit unwind opcodes
- uint32_t Entry = 0;
- uint32_t NumExtraEntryWords = 0;
-
- // TODO: This should be generated according to .save, .vsave, .setfp
- // directives. Currently, we are simply generating FINISH opcode.
- Entry |= NumExtraEntryWords << 24;
- Entry |= UNWIND_OPCODE_FINISH << 16;
- Entry |= UNWIND_OPCODE_FINISH << 8;
- Entry |= UNWIND_OPCODE_FINISH;
-
- EmitIntValue(Entry, 4, 0);
+ CollectUnwindOpcodes();
+ EmitBytes(UnwindOpAsm.data(), 0);
}
void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
Personality = Per;
+ UnwindOpAsm.setPersonality(Per);
}
-void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
- unsigned NewSpReg,
+void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
+ unsigned NewSPReg,
int64_t Offset) {
- // TODO: Not implemented
+ assert(SPOffset == 0 &&
+ "Current implementation assumes .setfp precedes .pad");
+
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+ uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg);
+#ifndef NDEBUG
+ uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg);
+#endif
+
+ assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) &&
+ "the operand of .setfp directive should be either $sp or $fp");
+
+ UsedFP = true;
+ FPReg = NewFPRegEncVal;
+ FPOffset = Offset;
}
void ARMELFStreamer::EmitPad(int64_t Offset) {
- // TODO: Not implemented
+ SPOffset += Offset;
}
void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool IsVector) {
- // TODO: Not implemented
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+#ifndef NDEBUG
+ unsigned Max = IsVector ? 32 : 16;
+#endif
+ uint32_t &RegMask = IsVector ? VFPRegSave : RegSave;
+
+ for (size_t i = 0; i < RegList.size(); ++i) {
+ unsigned Reg = MRI.getEncodingValue(RegList[i]);
+ assert(Reg < Max && "Register encoded value out of range");
+ RegMask |= 1u << Reg;
+ }
}
namespace llvm {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
index dad5576..fa4add6 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -107,6 +107,19 @@ namespace llvm {
UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
};
+ /// ARM-defined Personality Routine Index
+ enum ARMPersonalityRoutineIndex {
+ // To make the exception handling table become more compact, ARM defined
+ // several personality routines in EHABI. There are 3 different
+ // personality routines in ARM EHABI currently. It is possible to have 16
+ // pre-defined personality routines at most.
+ AEABI_UNWIND_CPP_PR0 = 0,
+ AEABI_UNWIND_CPP_PR1 = 1,
+ AEABI_UNWIND_CPP_PR2 = 2,
+
+ NUM_PERSONALITY_INDEX
+ };
+
}
#endif // ARM_UNWIND_OP_H
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
new file mode 100644
index 0000000..191db69
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -0,0 +1,198 @@
+//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOpAsm.h"
+
+#include "ARMUnwindOp.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
+ if (RegSave == 0u)
+ return;
+
+ // One byte opcode to save register r14 and r11-r4
+ if (RegSave & (1u << 4)) {
+ // The one byte opcode will always save r4, thus we can't use the one byte
+ // opcode when r4 is not in .save directive.
+
+ // Compute the consecutive registers from r4 to r11.
+ uint32_t Range = 0;
+ uint32_t Mask = (1u << 4);
+ for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
+ if ((RegSave & Bit) == 0u)
+ break;
+ ++Range;
+ Mask |= Bit;
+ }
+
+ // Emit this opcode when the mask covers every registers.
+ uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
+ if (UnmaskedReg == 0u) {
+ // Pop r[4 : (4 + n)]
+ Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+ RegSave &= 0x000fu;
+ } else if (UnmaskedReg == (1u << 14)) {
+ // Pop r[14] + r[4 : (4 + n)]
+ Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+ RegSave &= 0x000fu;
+ }
+ }
+
+ // Two bytes opcode to save register r15-r4
+ if ((RegSave & 0xfff0u) != 0) {
+ uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4);
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+
+ // Opcode to save register r3-r0
+ if ((RegSave & 0x000fu) != 0) {
+ uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu);
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+}
+
+/// Emit unwind opcodes for .vsave directives
+void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
+ size_t i = 32;
+
+ while (i > 16) {
+ uint32_t Bit = 1u << (i - 1);
+ if ((VFPRegSave & Bit) == 0u) {
+ --i;
+ continue;
+ }
+
+ uint32_t Range = 0;
+
+ --i;
+ Bit >>= 1;
+
+ while (i > 16 && (VFPRegSave & Bit)) {
+ --i;
+ ++Range;
+ Bit >>= 1;
+ }
+
+ uint32_t Op =
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range;
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+
+ while (i > 0) {
+ uint32_t Bit = 1u << (i - 1);
+ if ((VFPRegSave & Bit) == 0u) {
+ --i;
+ continue;
+ }
+
+ uint32_t Range = 0;
+
+ --i;
+ Bit >>= 1;
+
+ while (i > 0 && (VFPRegSave & Bit)) {
+ --i;
+ ++Range;
+ Bit >>= 1;
+ }
+
+ uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range;
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+}
+
+/// Emit unwind opcodes for .setfp directives
+void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) {
+ Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg);
+}
+
+/// Emit unwind opcodes to update stack pointer
+void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
+ if (Offset > 0x200) {
+ uint8_t Buff[10];
+ size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff);
+ Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128);
+ Ops.append(Buff, Buff + Size);
+ } else if (Offset > 0) {
+ if (Offset > 0x100) {
+ Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu);
+ Offset -= 0x100;
+ }
+ Ops.push_back(UNWIND_OPCODE_INC_VSP |
+ static_cast<uint8_t>((Offset - 4) >> 2));
+ } else if (Offset < 0) {
+ while (Offset < -0x100) {
+ Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu);
+ Offset += 0x100;
+ }
+ Ops.push_back(UNWIND_OPCODE_DEC_VSP |
+ static_cast<uint8_t>(((-Offset) - 4) >> 2));
+ }
+}
+
+void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) {
+ size_t SizeInWords = (size() + 3) / 4;
+ assert(SizeInWords <= 0x100u &&
+ "Only 256 additional words are allowed for unwind opcodes");
+ Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1);
+}
+
+void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) {
+ assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
+ Ops[Pos] = EHT_COMPACT | PI;
+}
+
+void UnwindOpcodeAssembler::EmitFinishOpcodes() {
+ for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i)
+ Ops.push_back(UNWIND_OPCODE_FINISH);
+}
+
+void UnwindOpcodeAssembler::Finalize() {
+ if (HasPersonality) {
+ // Personality specified by .personality directive
+ Offset = 1;
+ AddOpcodeSizePrefix(1);
+ } else {
+ if (getOpcodeSize() <= 3) {
+ // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
+ Offset = 1;
+ PersonalityIndex = AEABI_UNWIND_CPP_PR0;
+ AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+ } else {
+ // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ]
+ Offset = 0;
+ PersonalityIndex = AEABI_UNWIND_CPP_PR1;
+ AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+ AddOpcodeSizePrefix(1);
+ }
+ }
+
+ // Emit the padding finish opcodes if the size() is not multiple of 4.
+ EmitFinishOpcodes();
+
+ // Swap the byte order
+ uint8_t *Ptr = Ops.begin() + Offset;
+ assert(size() % 4 == 0 && "Final unwind opcodes should align to 4");
+ for (size_t i = 0, n = size(); i < n; i += 4) {
+ std::swap(Ptr[i], Ptr[i + 3]);
+ std::swap(Ptr[i + 1], Ptr[i + 2]);
+ }
+}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
new file mode 100644
index 0000000..f6ecaeb
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -0,0 +1,114 @@
+//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_ASM_H
+#define ARM_UNWIND_OP_ASM_H
+
+#include "ARMUnwindOp.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class UnwindOpcodeAssembler {
+private:
+ llvm::SmallVector<uint8_t, 8> Ops;
+
+ unsigned Offset;
+ unsigned PersonalityIndex;
+ bool HasPersonality;
+
+ enum {
+ // The number of bytes to be preserved for the size and personality index
+ // prefix of unwind opcodes.
+ NUM_PRESERVED_PREFIX_BUF = 2
+ };
+
+public:
+ UnwindOpcodeAssembler()
+ : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF),
+ PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) {
+ }
+
+ /// Reset the unwind opcode assembler.
+ void Reset() {
+ Ops.resize(NUM_PRESERVED_PREFIX_BUF);
+ Offset = NUM_PRESERVED_PREFIX_BUF;
+ PersonalityIndex = NUM_PERSONALITY_INDEX;
+ HasPersonality = 0;
+ }
+
+ /// Get the size of the payload (including the size byte)
+ size_t size() const {
+ return Ops.size() - Offset;
+ }
+
+ /// Get the beginning of the payload
+ const uint8_t *begin() const {
+ return Ops.begin() + Offset;
+ }
+
+ /// Get the payload
+ StringRef data() const {
+ return StringRef(reinterpret_cast<const char *>(begin()), size());
+ }
+
+ /// Set the personality index
+ void setPersonality(const MCSymbol *Per) {
+ HasPersonality = 1;
+ }
+
+ /// Get the personality index
+ unsigned getPersonalityIndex() const {
+ return PersonalityIndex;
+ }
+
+ /// Emit unwind opcodes for .save directives
+ void EmitRegSave(uint32_t RegSave);
+
+ /// Emit unwind opcodes for .vsave directives
+ void EmitVFPRegSave(uint32_t VFPRegSave);
+
+ /// Emit unwind opcodes for .setfp directives
+ void EmitSetFP(uint16_t FPReg);
+
+ /// Emit unwind opcodes to update stack pointer
+ void EmitSPOffset(int64_t Offset);
+
+ /// Finalize the unwind opcode sequence for EmitBytes()
+ void Finalize();
+
+private:
+ /// Get the size of the opcodes in bytes.
+ size_t getOpcodeSize() const {
+ return Ops.size() - NUM_PRESERVED_PREFIX_BUF;
+ }
+
+ /// Add the length prefix to the payload
+ void AddOpcodeSizePrefix(size_t Pos);
+
+ /// Add personality index prefix in some compact format
+ void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex);
+
+ /// Fill the words with finish opcode if it is not aligned
+ void EmitFinishOpcodes();
+};
+
+} // namespace llvm
+
+#endif // ARM_UNWIND_OP_ASM_H
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 2c3388c..1e2a8b0 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,7 +88,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -104,8 +104,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
int FramePtrSpillFI = 0;
- if (VARegSaveSize)
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+ if (ArgRegsSaveSize)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
if (!AFI->hasStackFrame()) {
@@ -249,7 +249,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -300,7 +300,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- if (VARegSaveSize) {
+ if (ArgRegsSaveSize) {
// Unlike T2 and ARM mode, the T1 pop instruction cannot restore
// to LR, and we can't pop the value directly to the PC since
// we need to update the SP after popping the value. Therefore, we
@@ -313,7 +313,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
.addReg(ARM::R3, RegState::Define);
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
@@ -376,7 +376,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
AddDefaultPred(MIB);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 67e8ec7..a1b48c2 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
@@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+ return;
+ }
+
ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
}
@@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ return;
+ }
+
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
@@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = -Offset;
isSub = true;
}
+ } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
+ Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+ NumBits = 8;
+ // MCInst operand has already scaled value.
+ Scale = 1;
+ if (Offset < 0) {
+ isSub = true;
+ Offset = -Offset;
+ }
} else {
llvm_unreachable("Unsupported addressing mode!");
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index d50f5d9..4795aae 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -926,13 +926,11 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
HighLatencyCPSR = false;
// Check predecessors for the latest CPSRDef.
- bool HasBackEdges = false;
for (MachineBasicBlock::pred_iterator
I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
if (!PInfo.Visited) {
// Since blocks are visited in RPO, this must be a back-edge.
- HasBackEdges = true;
continue;
}
if (PInfo.HighLatencyCPSR) {
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index dfbefc8..a9b00a2 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -29,26 +29,25 @@ namespace llvm {
class HexagonTargetMachine;
class raw_ostream;
- FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ FunctionPass *createHexagonISelDag(const HexagonTargetMachine &TM,
CodeGenOpt::Level OptLevel);
- FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
- FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
- FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
- FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
-
- FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
- FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+ FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
+ FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
+ FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
+ FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM);
+ FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM);
+ FunctionPass *createHexagonExpandPredSpillCode(
+ const HexagonTargetMachine &TM);
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonFixupHwLoops();
FunctionPass *createHexagonPacketizer();
FunctionPass *createHexagonNewValueJump();
-
/* TODO: object output.
MCCodeEmitter *createHexagonMCCodeEmitter(const Target &,
- TargetMachine &TM,
+ const TargetMachine &TM,
MCContext &Ctx);
*/
/* TODO: assembler input.
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 8a5ee40..9b3a643 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -84,12 +84,36 @@ def getPredOpcode : InstrMapping {
}
//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-true instructions with their
+// predicate-false forms
+//
+def getFalsePredOpcode : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+ let ColFields = ["PredSense"];
+ let KeyCol = ["true"];
+ let ValueCols = [["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-false instructions with their
+// predicate-true forms
+//
+def getTruePredOpcode : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+ let ColFields = ["PredSense"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
// Generate mapping table to relate predicated instructions with their .new
// format.
//
def getPredNewOpcode : InstrMapping {
let FilterClass = "PredNewRel";
- let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+ let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"];
let ColFields = ["PNewValue"];
let KeyCol = [""];
let ValueCols = [["new"]];
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index d4078ad..8597f11 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -26,21 +26,27 @@
using namespace llvm;
+namespace llvm {
+ void initializeHexagonCFGOptimizerPass(PassRegistry&);
+}
+
+
namespace {
class HexagonCFGOptimizer : public MachineFunctionPass {
private:
- HexagonTargetMachine& QTM;
+ const HexagonTargetMachine& QTM;
const HexagonSubtarget &QST;
void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
public:
static char ID;
- HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
- QTM(TM),
- QST(*TM.getSubtargetImpl()) {}
+ HexagonCFGOptimizer(const HexagonTargetMachine& TM)
+ : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+ initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
+ }
const char *getPassName() const {
return "Hexagon CFG Optimizer";
@@ -52,8 +58,8 @@ private:
char HexagonCFGOptimizer::ID = 0;
static bool IsConditionalBranch(int Opc) {
- return (Opc == Hexagon::JMP_c) || (Opc == Hexagon::JMP_cNot)
- || (Opc == Hexagon::JMP_cdnPt) || (Opc == Hexagon::JMP_cdnNotPt);
+ return (Opc == Hexagon::JMP_t) || (Opc == Hexagon::JMP_f)
+ || (Opc == Hexagon::JMP_tnew_t) || (Opc == Hexagon::JMP_fnew_t);
}
@@ -68,20 +74,20 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
const HexagonInstrInfo *QII = QTM.getInstrInfo();
int NewOpcode = 0;
switch(MI->getOpcode()) {
- case Hexagon::JMP_c:
- NewOpcode = Hexagon::JMP_cNot;
+ case Hexagon::JMP_t:
+ NewOpcode = Hexagon::JMP_f;
break;
- case Hexagon::JMP_cNot:
- NewOpcode = Hexagon::JMP_c;
+ case Hexagon::JMP_f:
+ NewOpcode = Hexagon::JMP_t;
break;
- case Hexagon::JMP_cdnPt:
- NewOpcode = Hexagon::JMP_cdnNotPt;
+ case Hexagon::JMP_tnew_t:
+ NewOpcode = Hexagon::JMP_fnew_t;
break;
- case Hexagon::JMP_cdnNotPt:
- NewOpcode = Hexagon::JMP_cdnPt;
+ case Hexagon::JMP_fnew_t:
+ NewOpcode = Hexagon::JMP_tnew_t;
break;
default:
@@ -156,8 +162,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// The target of the unconditional branch must be JumpAroundTarget.
// TODO: If not, we should not invert the unconditional branch.
MachineBasicBlock* CondBranchTarget = NULL;
- if ((MI->getOpcode() == Hexagon::JMP_c) ||
- (MI->getOpcode() == Hexagon::JMP_cNot)) {
+ if ((MI->getOpcode() == Hexagon::JMP_t) ||
+ (MI->getOpcode() == Hexagon::JMP_f)) {
CondBranchTarget = MI->getOperand(1).getMBB();
}
@@ -231,6 +237,16 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+ PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg",
+ &HexagonCFGOptimizer::ID, 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) {
return new HexagonCFGOptimizer(TM);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 0814421..8a5991f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -41,16 +41,24 @@
using namespace llvm;
+namespace llvm {
+ void initializeHexagonExpandPredSpillCodePass(PassRegistry&);
+}
+
+
namespace {
class HexagonExpandPredSpillCode : public MachineFunctionPass {
- HexagonTargetMachine& QTM;
+ const HexagonTargetMachine& QTM;
const HexagonSubtarget &QST;
public:
static char ID;
- HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
- MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+ HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeHexagonExpandPredSpillCodePass(Registry);
+ }
const char *getPassName() const {
return "Hexagon Expand Predicate Spill Code";
@@ -175,6 +183,19 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon Expand Predicate Spill Code";
+ PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred",
+ &HexagonExpandPredSpillCode::ID,
+ 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass*
+llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) {
return new HexagonExpandPredSpillCode(TM);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index d6a9329..de993ee 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -189,7 +189,7 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
// Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
// versions.
- if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+ if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret
&& !DisableDeallocRet) {
// Remove jumpr node.
MBB.erase(MBBI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1786624..d002788 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -541,12 +541,6 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
case Hexagon::CMPEQrr:
Cmp = !Negated ? Comparison::EQ : Comparison::NE;
break;
- case Hexagon::CMPLTrr:
- Cmp = !Negated ? Comparison::LTs : Comparison::GEs;
- break;
- case Hexagon::CMPLTUrr:
- Cmp = !Negated ? Comparison::LTu : Comparison::GEu;
- break;
case Hexagon::CMPGTUri:
case Hexagon::CMPGTUrr:
Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
@@ -1125,8 +1119,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
// The loop ends with either:
// - a conditional branch followed by an unconditional branch, or
// - a conditional branch to the loop start.
- if (LastI->getOpcode() == Hexagon::JMP_c ||
- LastI->getOpcode() == Hexagon::JMP_cNot) {
+ if (LastI->getOpcode() == Hexagon::JMP_t ||
+ LastI->getOpcode() == Hexagon::JMP_f) {
// Delete one and change/add an uncond. branch to out of the loop.
MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
LastI = LastMBB->erase(LastI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 8fc9ba1..54ca2c9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -49,11 +49,11 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
const HexagonSubtarget &Subtarget;
// Keep a reference to HexagonTargetMachine.
- HexagonTargetMachine& TM;
+ const HexagonTargetMachine& TM;
const HexagonInstrInfo *TII;
DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
public:
- explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+ explicit HexagonDAGToDAGISel(const HexagonTargetMachine &targetmachine,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(targetmachine, OptLevel),
Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
@@ -160,6 +160,17 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) {
return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
}
+// XformS8ToS8M1Imm - Return a target constant decremented by 1.
+inline SDValue XformSToSM1Imm(signed Imm) {
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+}
+
+// XformU8ToU8M1Imm - Return a target constant decremented by 1.
+inline SDValue XformUToUM1Imm(unsigned Imm) {
+ assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+}
+
// Include the pieces autogenerated from the target description.
#include "HexagonGenDAGISel.inc"
};
@@ -169,7 +180,7 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) {
/// createHexagonISelDag - This pass converts a legalized DAG into a
/// Hexagon-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
+FunctionPass *llvm::createHexagonISelDag(const HexagonTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new HexagonDAGToDAGISel(TM, OptLevel);
}
@@ -697,7 +708,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
// Build post increment store.
SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::Other, Ops, 4);
+ MVT::Other, Ops);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = ST->getMemOperand();
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
@@ -723,8 +734,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
// Build regular store.
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
- SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
- 4);
+ SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
// Build splitted incriment instruction.
SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
Base,
@@ -780,7 +790,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
Value, Chain};
// build indexed store
SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops, 4);
+ MVT::Other, Ops);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = ST->getMemOperand();
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
@@ -1230,8 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
}
EVT ReturnValueVT = N->getValueType(0);
SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
- ReturnValueVT,
- Ops.data(), Ops.size());
+ ReturnValueVT, Ops);
ReplaceUses(N, Result);
return Result;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 15858a9..0e5b8dc 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1002,14 +1002,6 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
-
-SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
- SelectionDAG& DAG) const {
- DebugLoc dl = Op.getDebugLoc();
- return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
-}
-
-
SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
SelectionDAG& DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -1361,7 +1353,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
}
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
if (EmitJumpTables) {
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
} else {
@@ -1377,7 +1368,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -1444,7 +1434,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
- setOperationAction(ISD::EH_RETURN, MVT::Other, Expand);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
if (TM.getSubtargetImpl()->isSubtargetV2()) {
setExceptionPointerRegister(Hexagon::R20);
@@ -1499,6 +1489,7 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
}
}
@@ -1520,16 +1511,43 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
}
SDValue
+HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Offset = Op.getOperand(1);
+ SDValue Handler = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Mark function as containing a call to EH_RETURN.
+ HexagonMachineFunctionInfo *FuncInfo =
+ DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
+ FuncInfo->setHasEHReturn();
+
+ unsigned OffsetReg = Hexagon::R28;
+
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ DAG.getRegister(Hexagon::R30, getPointerTy()),
+ DAG.getIntPtrConstant(4));
+ Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+ false, false, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
+
+ // Not needed we already use it as explict input to EH_RETURN.
+ // MF.getRegInfo().addLiveOut(OffsetReg);
+
+ return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
+}
+
+SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
// Frame & Return address. Currently unimplemented.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GlobalTLSAddress:
llvm_unreachable("TLS not implemented for Hexagon.");
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 3279cc6..bb1acc1 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -62,7 +62,8 @@ namespace llvm {
WrapperShuffEH,
WrapperShuffOB,
WrapperShuffOH,
- TC_RETURN
+ TC_RETURN,
+ EH_RETURN
};
}
@@ -101,6 +102,7 @@ namespace llvm {
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -122,7 +124,6 @@ namespace llvm {
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 60b12ac..f114170 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -23,7 +23,9 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#define GET_INSTRINFO_CTOR
#define GET_INSTRMAP_INFO
#include "HexagonGenInstrInfo.inc"
@@ -118,16 +120,16 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
DebugLoc DL) const{
int BOpc = Hexagon::JMP;
- int BccOpc = Hexagon::JMP_c;
+ int BccOpc = Hexagon::JMP_t;
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
int regPos = 0;
// Check if ReverseBranchCondition has asked to reverse this branch
// If we want to reverse the branch an odd number of times, we want
- // JMP_cNot.
+ // JMP_f.
if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
- BccOpc = Hexagon::JMP_cNot;
+ BccOpc = Hexagon::JMP_f;
regPos = 1;
}
@@ -174,8 +176,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
FBB = NULL;
// If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ if (I == MBB.instr_begin())
return false;
// A basic block may looks like this:
@@ -194,13 +196,24 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
--I;
if (I->isEHLabel())
return true;
- } while (I != MBB.begin());
+ } while (I != MBB.instr_begin());
- I = MBB.end();
+ I = MBB.instr_end();
--I;
while (I->isDebugValue()) {
- if (I == MBB.begin())
+ if (I == MBB.instr_begin())
+ return false;
+ --I;
+ }
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (AllowModify && I->getOpcode() == Hexagon::JMP &&
+ MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+ DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
+ I->eraseFromParent();
+ I = MBB.instr_end();
+ if (I == MBB.instr_begin())
return false;
--I;
}
@@ -209,23 +222,42 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Get the last instruction in the block.
MachineInstr *LastInst = I;
+ MachineInstr *SecondLastInst = NULL;
+ // Find one more terminator if present.
+ do {
+ if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) {
+ if (!SecondLastInst)
+ SecondLastInst = I;
+ else
+ // This is a third branch.
+ return true;
+ }
+ if (I == MBB.instr_begin())
+ break;
+ --I;
+ } while(I);
+
+ int LastOpcode = LastInst->getOpcode();
+
+ bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
+ bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode);
// If there is only one terminator instruction, process it.
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (LastInst->getOpcode() == Hexagon::JMP) {
+ if (LastInst && !SecondLastInst) {
+ if (LastOpcode == Hexagon::JMP) {
TBB = LastInst->getOperand(0).getMBB();
return false;
}
- if (LastInst->getOpcode() == Hexagon::JMP_c) {
- // Block ends with fall-through true condbranch.
- TBB = LastInst->getOperand(1).getMBB();
+ if (LastOpcode == Hexagon::ENDLOOP0) {
+ TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(LastInst->getOperand(0));
return false;
}
- if (LastInst->getOpcode() == Hexagon::JMP_cNot) {
- // Block ends with fall-through false condbranch.
+ if (LastOpcodeHasJMP_c) {
TBB = LastInst->getOperand(1).getMBB();
- Cond.push_back(MachineOperand::CreateImm(0));
+ if (LastOpcodeHasNot) {
+ Cond.push_back(MachineOperand::CreateImm(0));
+ }
Cond.push_back(LastInst->getOperand(0));
return false;
}
@@ -233,29 +265,14 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
}
- // Get the instruction before it if it's a terminator.
- MachineInstr *SecondLastInst = I;
-
- // If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() &&
- isUnpredicatedTerminator(--I))
- return true;
+ int SecLastOpcode = SecondLastInst->getOpcode();
- // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
- if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
- (SecondLastInst->getOpcode() == Hexagon::JMP_c)) &&
- LastInst->getOpcode() == Hexagon::JMP) {
+ bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
+ bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
+ if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::JMP)) {
TBB = SecondLastInst->getOperand(1).getMBB();
- Cond.push_back(SecondLastInst->getOperand(0));
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
-
- // If the block ends with Hexagon::JMP_cNot and Hexagon:JMP, handle it.
- if ((SecondLastInst->getOpcode() == Hexagon::JMP_cNot) &&
- LastInst->getOpcode() == Hexagon::JMP) {
- TBB = SecondLastInst->getOperand(1).getMBB();
- Cond.push_back(MachineOperand::CreateImm(0));
+ if (SecLastOpcodeHasNot)
+ Cond.push_back(MachineOperand::CreateImm(0));
Cond.push_back(SecondLastInst->getOperand(0));
FBB = LastInst->getOperand(0).getMBB();
return false;
@@ -263,8 +280,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If the block ends with two Hexagon:JMPs, handle it. The second one is not
// executed, so remove it.
- if (SecondLastInst->getOpcode() == Hexagon::JMP &&
- LastInst->getOpcode() == Hexagon::JMP) {
+ if (SecLastOpcode == Hexagon::JMP && LastOpcode == Hexagon::JMP) {
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
if (AllowModify)
@@ -272,6 +288,15 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
}
+ // If the block ends with an ENDLOOP, and JMP, handle it.
+ if (SecLastOpcode == Hexagon::ENDLOOP0 &&
+ LastOpcode == Hexagon::JMP) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
// Otherwise, can't handle this.
return true;
}
@@ -279,8 +304,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
int BOpc = Hexagon::JMP;
- int BccOpc = Hexagon::JMP_c;
- int BccOpcNot = Hexagon::JMP_cNot;
+ int BccOpc = Hexagon::JMP_t;
+ int BccOpcNot = Hexagon::JMP_f;
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
@@ -325,8 +350,6 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
case Hexagon::CMPGTUrr:
case Hexagon::CMPGTri:
case Hexagon::CMPGTrr:
- case Hexagon::CMPLTUrr:
- case Hexagon::CMPLTrr:
SrcReg = MI->getOperand(1).getReg();
Mask = ~0;
break;
@@ -366,8 +389,6 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
case Hexagon::CMPhEQrr_xor_V4:
case Hexagon::CMPhGTUrr_V4:
case Hexagon::CMPhGTrr_shl_V4:
- case Hexagon::CMPLTUrr:
- case Hexagon::CMPLTrr:
SrcReg2 = MI->getOperand(2).getReg();
return true;
@@ -605,110 +626,8 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
return false;
}
-bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default: return false;
- // JMP_EQri
- case Hexagon::JMP_EQriPt_nv_V4:
- case Hexagon::JMP_EQriPnt_nv_V4:
- case Hexagon::JMP_EQriNotPt_nv_V4:
- case Hexagon::JMP_EQriNotPnt_nv_V4:
- case Hexagon::JMP_EQriPt_ie_nv_V4:
- case Hexagon::JMP_EQriPnt_ie_nv_V4:
- case Hexagon::JMP_EQriNotPt_ie_nv_V4:
- case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
-
- // JMP_EQri - with -1
- case Hexagon::JMP_EQriPtneg_nv_V4:
- case Hexagon::JMP_EQriPntneg_nv_V4:
- case Hexagon::JMP_EQriNotPtneg_nv_V4:
- case Hexagon::JMP_EQriNotPntneg_nv_V4:
- case Hexagon::JMP_EQriPtneg_ie_nv_V4:
- case Hexagon::JMP_EQriPntneg_ie_nv_V4:
- case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
- case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
-
- // JMP_EQrr
- case Hexagon::JMP_EQrrPt_nv_V4:
- case Hexagon::JMP_EQrrPnt_nv_V4:
- case Hexagon::JMP_EQrrNotPt_nv_V4:
- case Hexagon::JMP_EQrrNotPnt_nv_V4:
- case Hexagon::JMP_EQrrPt_ie_nv_V4:
- case Hexagon::JMP_EQrrPnt_ie_nv_V4:
- case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
- case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
-
- // JMP_GTri
- case Hexagon::JMP_GTriPt_nv_V4:
- case Hexagon::JMP_GTriPnt_nv_V4:
- case Hexagon::JMP_GTriNotPt_nv_V4:
- case Hexagon::JMP_GTriNotPnt_nv_V4:
- case Hexagon::JMP_GTriPt_ie_nv_V4:
- case Hexagon::JMP_GTriPnt_ie_nv_V4:
- case Hexagon::JMP_GTriNotPt_ie_nv_V4:
- case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
-
- // JMP_GTri - with -1
- case Hexagon::JMP_GTriPtneg_nv_V4:
- case Hexagon::JMP_GTriPntneg_nv_V4:
- case Hexagon::JMP_GTriNotPtneg_nv_V4:
- case Hexagon::JMP_GTriNotPntneg_nv_V4:
- case Hexagon::JMP_GTriPtneg_ie_nv_V4:
- case Hexagon::JMP_GTriPntneg_ie_nv_V4:
- case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
- case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
-
- // JMP_GTrr
- case Hexagon::JMP_GTrrPt_nv_V4:
- case Hexagon::JMP_GTrrPnt_nv_V4:
- case Hexagon::JMP_GTrrNotPt_nv_V4:
- case Hexagon::JMP_GTrrNotPnt_nv_V4:
- case Hexagon::JMP_GTrrPt_ie_nv_V4:
- case Hexagon::JMP_GTrrPnt_ie_nv_V4:
- case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
- case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
-
- // JMP_GTrrdn
- case Hexagon::JMP_GTrrdnPt_nv_V4:
- case Hexagon::JMP_GTrrdnPnt_nv_V4:
- case Hexagon::JMP_GTrrdnNotPt_nv_V4:
- case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
- case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
- case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
-
- // JMP_GTUri
- case Hexagon::JMP_GTUriPt_nv_V4:
- case Hexagon::JMP_GTUriPnt_nv_V4:
- case Hexagon::JMP_GTUriNotPt_nv_V4:
- case Hexagon::JMP_GTUriNotPnt_nv_V4:
- case Hexagon::JMP_GTUriPt_ie_nv_V4:
- case Hexagon::JMP_GTUriPnt_ie_nv_V4:
- case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
-
- // JMP_GTUrr
- case Hexagon::JMP_GTUrrPt_nv_V4:
- case Hexagon::JMP_GTUrrPnt_nv_V4:
- case Hexagon::JMP_GTUrrNotPt_nv_V4:
- case Hexagon::JMP_GTUrrNotPnt_nv_V4:
- case Hexagon::JMP_GTUrrPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
- case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
-
- // JMP_GTUrrdn
- case Hexagon::JMP_GTUrrdnPt_nv_V4:
- case Hexagon::JMP_GTUrrdnPnt_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
- case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
- case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
- return true;
- }
+bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const {
+ return MI->getDesc().isBranch();
}
bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
@@ -746,11 +665,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STrib_abs_cdnPt_nv_V4:
case Hexagon::STrib_abs_cNotPt_nv_V4:
case Hexagon::STrib_abs_cdnNotPt_nv_V4:
- case Hexagon::STrib_imm_abs_nv_V4:
- case Hexagon::STrib_imm_abs_cPt_nv_V4:
- case Hexagon::STrib_imm_abs_cdnPt_nv_V4:
- case Hexagon::STrib_imm_abs_cNotPt_nv_V4:
- case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4:
// Store Halfword
case Hexagon::STrih_nv_V4:
@@ -784,11 +698,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STrih_abs_cdnPt_nv_V4:
case Hexagon::STrih_abs_cNotPt_nv_V4:
case Hexagon::STrih_abs_cdnNotPt_nv_V4:
- case Hexagon::STrih_imm_abs_nv_V4:
- case Hexagon::STrih_imm_abs_cPt_nv_V4:
- case Hexagon::STrih_imm_abs_cdnPt_nv_V4:
- case Hexagon::STrih_imm_abs_cNotPt_nv_V4:
- case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4:
// Store Word
case Hexagon::STriw_nv_V4:
@@ -822,11 +731,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
case Hexagon::STriw_abs_cdnPt_nv_V4:
case Hexagon::STriw_abs_cNotPt_nv_V4:
case Hexagon::STriw_abs_cdnNotPt_nv_V4:
- case Hexagon::STriw_imm_abs_nv_V4:
- case Hexagon::STriw_imm_abs_cPt_nv_V4:
- case Hexagon::STriw_imm_abs_cdnPt_nv_V4:
- case Hexagon::STriw_imm_abs_cNotPt_nv_V4:
- case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4:
return true;
}
}
@@ -1003,9 +907,6 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
case Hexagon::ZXTB:
case Hexagon::ZXTH:
return Subtarget.hasV4TOps();
-
- case Hexagon::JMPR:
- return false;
}
return true;
@@ -1030,6 +931,12 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
// cNotPt ---> cNotPt_nv
// cPt ---> cPt_nv
unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+ int InvPredOpcode;
+ InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
+ : Hexagon::getTruePredOpcode(Opc);
+ if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
+ return InvPredOpcode;
+
switch(Opc) {
default: llvm_unreachable("Unexpected predicated instruction");
case Hexagon::TFR_cPt:
@@ -1042,10 +949,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
case Hexagon::TFRI_cNotPt:
return Hexagon::TFRI_cPt;
- case Hexagon::JMP_c:
- return Hexagon::JMP_cNot;
- case Hexagon::JMP_cNot:
- return Hexagon::JMP_c;
+ case Hexagon::JMP_t:
+ return Hexagon::JMP_f;
+ case Hexagon::JMP_f:
+ return Hexagon::JMP_t;
case Hexagon::ADD_ri_cPt:
return Hexagon::ADD_ri_cNotPt;
@@ -1113,10 +1020,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
return Hexagon::ZXTH_cPt_V4;
- case Hexagon::JMPR_cPt:
- return Hexagon::JMPR_cNotPt;
- case Hexagon::JMPR_cNotPt:
- return Hexagon::JMPR_cPt;
+ case Hexagon::JMPR_t:
+ return Hexagon::JMPR_f;
+ case Hexagon::JMPR_f:
+ return Hexagon::JMPR_t;
// V4 indexed+scaled load.
case Hexagon::LDrid_indexed_shl_cPt_V4:
@@ -1362,117 +1269,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
return Hexagon::DEALLOC_RET_cNotPt_V4;
case Hexagon::DEALLOC_RET_cNotPt_V4:
return Hexagon::DEALLOC_RET_cPt_V4;
-
- // New Value Jump.
- // JMPEQ_ri - with -1.
- case Hexagon::JMP_EQriPtneg_nv_V4:
- return Hexagon::JMP_EQriNotPtneg_nv_V4;
- case Hexagon::JMP_EQriNotPtneg_nv_V4:
- return Hexagon::JMP_EQriPtneg_nv_V4;
-
- case Hexagon::JMP_EQriPntneg_nv_V4:
- return Hexagon::JMP_EQriNotPntneg_nv_V4;
- case Hexagon::JMP_EQriNotPntneg_nv_V4:
- return Hexagon::JMP_EQriPntneg_nv_V4;
-
- // JMPEQ_ri.
- case Hexagon::JMP_EQriPt_nv_V4:
- return Hexagon::JMP_EQriNotPt_nv_V4;
- case Hexagon::JMP_EQriNotPt_nv_V4:
- return Hexagon::JMP_EQriPt_nv_V4;
-
- case Hexagon::JMP_EQriPnt_nv_V4:
- return Hexagon::JMP_EQriNotPnt_nv_V4;
- case Hexagon::JMP_EQriNotPnt_nv_V4:
- return Hexagon::JMP_EQriPnt_nv_V4;
-
- // JMPEQ_rr.
- case Hexagon::JMP_EQrrPt_nv_V4:
- return Hexagon::JMP_EQrrNotPt_nv_V4;
- case Hexagon::JMP_EQrrNotPt_nv_V4:
- return Hexagon::JMP_EQrrPt_nv_V4;
-
- case Hexagon::JMP_EQrrPnt_nv_V4:
- return Hexagon::JMP_EQrrNotPnt_nv_V4;
- case Hexagon::JMP_EQrrNotPnt_nv_V4:
- return Hexagon::JMP_EQrrPnt_nv_V4;
-
- // JMPGT_ri - with -1.
- case Hexagon::JMP_GTriPtneg_nv_V4:
- return Hexagon::JMP_GTriNotPtneg_nv_V4;
- case Hexagon::JMP_GTriNotPtneg_nv_V4:
- return Hexagon::JMP_GTriPtneg_nv_V4;
-
- case Hexagon::JMP_GTriPntneg_nv_V4:
- return Hexagon::JMP_GTriNotPntneg_nv_V4;
- case Hexagon::JMP_GTriNotPntneg_nv_V4:
- return Hexagon::JMP_GTriPntneg_nv_V4;
-
- // JMPGT_ri.
- case Hexagon::JMP_GTriPt_nv_V4:
- return Hexagon::JMP_GTriNotPt_nv_V4;
- case Hexagon::JMP_GTriNotPt_nv_V4:
- return Hexagon::JMP_GTriPt_nv_V4;
-
- case Hexagon::JMP_GTriPnt_nv_V4:
- return Hexagon::JMP_GTriNotPnt_nv_V4;
- case Hexagon::JMP_GTriNotPnt_nv_V4:
- return Hexagon::JMP_GTriPnt_nv_V4;
-
- // JMPGT_rr.
- case Hexagon::JMP_GTrrPt_nv_V4:
- return Hexagon::JMP_GTrrNotPt_nv_V4;
- case Hexagon::JMP_GTrrNotPt_nv_V4:
- return Hexagon::JMP_GTrrPt_nv_V4;
-
- case Hexagon::JMP_GTrrPnt_nv_V4:
- return Hexagon::JMP_GTrrNotPnt_nv_V4;
- case Hexagon::JMP_GTrrNotPnt_nv_V4:
- return Hexagon::JMP_GTrrPnt_nv_V4;
-
- // JMPGT_rrdn.
- case Hexagon::JMP_GTrrdnPt_nv_V4:
- return Hexagon::JMP_GTrrdnNotPt_nv_V4;
- case Hexagon::JMP_GTrrdnNotPt_nv_V4:
- return Hexagon::JMP_GTrrdnPt_nv_V4;
-
- case Hexagon::JMP_GTrrdnPnt_nv_V4:
- return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
- case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
- return Hexagon::JMP_GTrrdnPnt_nv_V4;
-
- // JMPGTU_ri.
- case Hexagon::JMP_GTUriPt_nv_V4:
- return Hexagon::JMP_GTUriNotPt_nv_V4;
- case Hexagon::JMP_GTUriNotPt_nv_V4:
- return Hexagon::JMP_GTUriPt_nv_V4;
-
- case Hexagon::JMP_GTUriPnt_nv_V4:
- return Hexagon::JMP_GTUriNotPnt_nv_V4;
- case Hexagon::JMP_GTUriNotPnt_nv_V4:
- return Hexagon::JMP_GTUriPnt_nv_V4;
-
- // JMPGTU_rr.
- case Hexagon::JMP_GTUrrPt_nv_V4:
- return Hexagon::JMP_GTUrrNotPt_nv_V4;
- case Hexagon::JMP_GTUrrNotPt_nv_V4:
- return Hexagon::JMP_GTUrrPt_nv_V4;
-
- case Hexagon::JMP_GTUrrPnt_nv_V4:
- return Hexagon::JMP_GTUrrNotPnt_nv_V4;
- case Hexagon::JMP_GTUrrNotPnt_nv_V4:
- return Hexagon::JMP_GTUrrPnt_nv_V4;
-
- // JMPGTU_rrdn.
- case Hexagon::JMP_GTUrrdnPt_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
- return Hexagon::JMP_GTUrrdnPt_nv_V4;
-
- case Hexagon::JMP_GTUrrdnPnt_nv_V4:
- return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
- case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
- return Hexagon::JMP_GTUrrdnPnt_nv_V4;
}
}
@@ -1499,14 +1295,9 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
return !invertPredicate ? Hexagon::TFRI_cPt :
Hexagon::TFRI_cNotPt;
case Hexagon::JMP:
- return !invertPredicate ? Hexagon::JMP_c :
- Hexagon::JMP_cNot;
- case Hexagon::JMP_EQrrPt_nv_V4:
- return !invertPredicate ? Hexagon::JMP_EQrrPt_nv_V4 :
- Hexagon::JMP_EQrrNotPt_nv_V4;
- case Hexagon::JMP_EQriPt_nv_V4:
- return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 :
- Hexagon::JMP_EQriNotPt_nv_V4;
+ return !invertPredicate ? Hexagon::JMP_t :
+ Hexagon::JMP_f;
+
case Hexagon::COMBINE_rr:
return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
Hexagon::COMBINE_rr_cNotPt;
@@ -1530,8 +1321,8 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
Hexagon::ZXTH_cNotPt_V4;
case Hexagon::JMPR:
- return !invertPredicate ? Hexagon::JMPR_cPt :
- Hexagon::JMPR_cNotPt;
+ return !invertPredicate ? Hexagon::JMPR_t :
+ Hexagon::JMPR_f;
// V4 indexed+scaled load.
case Hexagon::LDrid_indexed_shl_V4:
@@ -1830,11 +1621,15 @@ PredicateInstruction(MachineInstr *MI,
// It is better to have an assert here to check this. But I don't know how
// to write this assert because findFirstPredOperandIdx() would return -1
if (oper < -1) oper = -1;
+
MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
- PredMO.isImplicit(), PredMO.isKill(),
+ PredMO.isImplicit(), false,
PredMO.isDead(), PredMO.isUndef(),
PredMO.isDebug());
+ MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo();
+ RegInfo.clearKillFlags(PredMO.getReg());
+
if (hasGAOpnd)
{
unsigned int i;
@@ -1883,13 +1678,41 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
return true;
}
-
+// Returns true if an instruction is predicated irrespective of the predicate
+// sense. For example, all of the following will return true.
+// if (p0) R1 = add(R2, R3)
+// if (!p0) R1 = add(R2, R3)
+// if (p0.new) R1 = add(R2, R3)
+// if (!p0.new) R1 = add(R2, R3)
bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
}
+bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+
+ return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+}
+
+bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+
+ assert(isPredicated(MI));
+ return (!((F >> HexagonII::PredicatedFalsePos) &
+ HexagonII::PredicatedFalseMask));
+}
+
+bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+
+ // Make sure that the instruction is predicated.
+ assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+ return (!((F >> HexagonII::PredicatedFalsePos) &
+ HexagonII::PredicatedFalseMask));
+}
+
bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
@@ -1897,6 +1720,13 @@ bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
}
+bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+
+ assert(isPredicated(Opcode));
+ return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
bool
HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
@@ -2129,14 +1959,10 @@ bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
default: return false;
case Hexagon::CMPEQrr:
case Hexagon::CMPEQri:
- case Hexagon::CMPLTrr:
case Hexagon::CMPGTrr:
case Hexagon::CMPGTri:
- case Hexagon::CMPLTUrr:
case Hexagon::CMPGTUrr:
case Hexagon::CMPGTUri:
- case Hexagon::CMPGEri:
- case Hexagon::CMPGEUri:
return true;
}
}
@@ -2369,6 +2195,18 @@ isConditionalStore (const MachineInstr* MI) const {
}
}
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+ if (isNewValue(MI) && isBranch(MI))
+ return true;
+ return false;
+}
+
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
// Returns true, if any one of the operands is a dot new
// insn, whether it is predicated dot new or register dot new.
bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
@@ -2470,6 +2308,34 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
return (ImmValue < MinValue || ImmValue > MaxValue);
}
+// Returns the opcode to use when converting MI, which is a conditional jump,
+// into a conditional instruction which uses the .new value of the predicate.
+// We also use branch probabilities to add a hint to the jump.
+int
+HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
+ const
+ MachineBranchProbabilityInfo *MBPI) const {
+
+ // We assume that block can have at most two successors.
+ bool taken = false;
+ MachineBasicBlock *Src = MI->getParent();
+ MachineOperand *BrTarget = &MI->getOperand(1);
+ MachineBasicBlock *Dst = BrTarget->getMBB();
+
+ const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
+ if (Prediction >= BranchProbability(1,2))
+ taken = true;
+
+ switch (MI->getOpcode()) {
+ case Hexagon::JMP_t:
+ return taken ? Hexagon::JMP_tnew_t : Hexagon::JMP_tnew_nt;
+ case Hexagon::JMP_f:
+ return taken ? Hexagon::JMP_fnew_t : Hexagon::JMP_fnew_nt;
+
+ default:
+ llvm_unreachable("Unexpected jump instruction.");
+ }
+}
// Returns true if a particular operand is extendable for an instruction.
bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
unsigned short OperandNum) const {
@@ -2574,3 +2440,18 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
}
return -1;
}
+
+bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
+ return (Opcode == Hexagon::JMP_t) ||
+ (Opcode == Hexagon::JMP_f) ||
+ (Opcode == Hexagon::JMP_tnew_t) ||
+ (Opcode == Hexagon::JMP_fnew_t) ||
+ (Opcode == Hexagon::JMP_tnew_nt) ||
+ (Opcode == Hexagon::JMP_fnew_nt);
+}
+
+bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const {
+ return (Opcode == Hexagon::JMP_f) ||
+ (Opcode == Hexagon::JMP_fnew_t) ||
+ (Opcode == Hexagon::JMP_fnew_nt);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 5df13a8..b721da4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,9 +16,9 @@
#include "HexagonRegisterInfo.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
-
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#define GET_INSTRINFO_HEADER
#include "HexagonGenInstrInfo.inc"
@@ -28,6 +28,8 @@ namespace llvm {
class HexagonInstrInfo : public HexagonGenInstrInfo {
const HexagonRegisterInfo RI;
const HexagonSubtarget& Subtarget;
+ typedef unsigned Opcode_t;
+
public:
explicit HexagonInstrInfo(HexagonSubtarget &ST);
@@ -111,6 +113,7 @@ public:
unsigned createVR(MachineFunction* MF, MVT VT) const;
+ virtual bool isBranch(const MachineInstr *MI) const;
virtual bool isPredicable(MachineInstr *MI) const;
virtual bool
PredicateInstruction(MachineInstr *MI,
@@ -127,7 +130,11 @@ public:
const BranchProbability &Probability) const;
virtual bool isPredicated(const MachineInstr *MI) const;
+ virtual bool isPredicated(unsigned Opcode) const;
+ virtual bool isPredicatedTrue(const MachineInstr *MI) const;
+ virtual bool isPredicatedTrue(unsigned Opcode) const;
virtual bool isPredicatedNew(const MachineInstr *MI) const;
+ virtual bool isPredicatedNew(unsigned Opcode) const;
virtual bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
virtual bool
@@ -176,6 +183,7 @@ public:
bool isConditionalLoad (const MachineInstr* MI) const;
bool isConditionalStore(const MachineInstr* MI) const;
bool isNewValueInst(const MachineInstr* MI) const;
+ bool isNewValue(const MachineInstr* MI) const;
bool isDotNewInst(const MachineInstr* MI) const;
bool isDeallocRet(const MachineInstr *MI) const;
unsigned getInvertedPredicatedOpcode(const int Opc) const;
@@ -189,6 +197,8 @@ public:
void immediateExtend(MachineInstr *MI) const;
bool isConstExtended(MachineInstr *MI) const;
+ int getDotNewPredJumpOp(MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const;
unsigned getAddrMode(const MachineInstr* MI) const;
bool isOperandExtended(const MachineInstr *MI,
unsigned short OperandNum) const;
@@ -197,6 +207,9 @@ public:
int getMaxValue(const MachineInstr *MI) const;
bool NonExtEquivalentExists (const MachineInstr *MI) const;
short getNonExtOpcode(const MachineInstr *MI) const;
+ bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
+ bool PredOpcodeHasNot(Opcode_t Opcode) const;
+
private:
int getMatchingCondBranchOpcode(int Opc, bool sense) const;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
index 74dc0ca..2a4b17b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -14,6 +14,8 @@
include "HexagonInstrFormats.td"
include "HexagonOperands.td"
+//===----------------------------------------------------------------------===//
+
// Multi-class for logical operators.
multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
@@ -34,12 +36,6 @@ multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
[(set (i1 PredRegs:$dst),
(OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>;
}
-multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
- def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
- [(set (i1 PredRegs:$dst),
- (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
-}
multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> {
let CextOpcode = CextOp in {
@@ -75,14 +71,6 @@ multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> {
}
}
-multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> {
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
- def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Ext:$c),
- !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
- [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
- u8ExtPred:$c))]>;
-}
-
multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c),
@@ -95,22 +83,30 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
//===----------------------------------------------------------------------===//
// ALU32/ALU (Instructions with register-register form)
//===----------------------------------------------------------------------===//
-multiclass ALU32_Pbase<string mnemonic, bit isNot,
- bit isPredNew> {
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonWrapperCombineII :
+ SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
- let PNewValue = !if(isPredNew, "new", "") in
- def NAME : ALU32_rr<(outs IntRegs:$dst),
+def HexagonWrapperCombineRR :
+ SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+
+multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+ bit isPredNew> {
+ let isPredicatedNew = isPredNew in
+ def NAME : ALU32_rr<(outs RC:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
") $dst = ")#mnemonic#"($src2, $src3)",
[]>;
}
-multiclass ALU32_Pred<string mnemonic, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
- defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
+multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+ let isPredicatedFalse = PredNot in {
+ defm _c#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
- defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
+ defm _cdn#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 1>;
}
}
@@ -125,8 +121,8 @@ multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
(i32 IntRegs:$src2)))]>;
let neverHasSideEffects = 1, isPredicated = 1 in {
- defm Pt : ALU32_Pred<mnemonic, 0>;
- defm NotPt : ALU32_Pred<mnemonic, 1>;
+ defm Pt : ALU32_Pred<mnemonic, IntRegs, 0>;
+ defm NotPt : ALU32_Pred<mnemonic, IntRegs, 1>;
}
}
}
@@ -140,11 +136,42 @@ let isCommutable = 1 in {
defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
+// Combines the two integer registers SRC1 and SRC2 into a double register.
+let isPredicable = 1 in
+class T_Combine : ALU32_rr<(outs DoubleRegs:$dst),
+ (ins IntRegs:$src1, IntRegs:$src2),
+ "$dst = combine($src1, $src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
+ (i32 IntRegs:$src2))))]>;
+
+multiclass Combine_base {
+ let BaseOpcode = "combine" in {
+ def NAME : T_Combine;
+ let neverHasSideEffects = 1, isPredicated = 1 in {
+ defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>;
+ defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>;
+ }
+ }
+}
+
+defm COMBINE_rr : Combine_base, PredNewRel;
+
+// Combines the two immediates SRC1 and SRC2 into a double register.
+class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
+ ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
+ "$dst = combine(#$src1, #$src2)",
+ [(set (i64 DoubleRegs:$dst),
+ (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
+def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
+
//===----------------------------------------------------------------------===//
// ALU32/ALU (ADD with register-immediate form)
//===----------------------------------------------------------------------===//
multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
@@ -153,7 +180,7 @@ multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
}
multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
// Predicate new
defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
@@ -189,11 +216,6 @@ def OR_ri : ALU32_ri<(outs IntRegs:$dst),
[(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
s10ExtPred:$src2))]>, ImmRegRel;
-def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1),
- "$dst = not($src1)",
- [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>;
-
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
InputType = "imm", CextOpcode = "AND" in
def AND_ri : ALU32_ri<(outs IntRegs:$dst),
@@ -201,10 +223,7 @@ def AND_ri : ALU32_ri<(outs IntRegs:$dst),
"$dst = and($src1, #$src2)",
[(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
s10ExtPred:$src2))]>, ImmRegRel;
-// Negate.
-def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
- "$dst = neg($src1)",
- [(set (i32 IntRegs:$dst), (ineg (i32 IntRegs:$src1)))]>;
+
// Nop.
let neverHasSideEffects = 1 in
def NOP : ALU32_rr<(outs), (ins),
@@ -220,15 +239,21 @@ def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
[(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>,
ImmRegRel;
+// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
+def : Pat<(not (i32 IntRegs:$src1)),
+ (SUB_ri -1, (i32 IntRegs:$src1))>;
+
+// Rd = neg(Rs) gets mapped to Rd=sub(#0, Rs).
+// Pattern definition for 'neg' was not necessary.
multiclass TFR_Pred<bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
[]>;
// Predicate new
- let PNewValue = "new" in
+ let isPredicatedNew = 1 in
def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
@@ -274,10 +299,10 @@ class T_TFR64_Pred<bit PredNot, bit isPredNew>
}
multiclass TFR64_Pred<bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
def _c#NAME : T_TFR64_Pred<PredNot, 0>;
- let PNewValue = "new" in
+ let isPredicatedNew = 1 in
def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
}
}
@@ -309,14 +334,14 @@ multiclass TFR64_base<string BaseName> {
}
multiclass TFRI_Pred<bit PredNot> {
- let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in {
+ let isMoveImm = 1, isPredicatedFalse = PredNot in {
def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, s12Ext:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
[]>;
// Predicate new
- let PNewValue = "new" in
+ let isPredicatedNew = 1 in
def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, s12Ext:$src2),
!if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
@@ -359,52 +384,6 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
// ALU32/PERM +
//===----------------------------------------------------------------------===//
-// Combine.
-
-def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
-
-def HexagonWrapperCombineII :
- SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
-def HexagonWrapperCombineRR :
- SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
-
-// Combines the two integer registers SRC1 and SRC2 into a double register.
-let isPredicable = 1 in
-def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1,
- IntRegs:$src2),
- "$dst = combine($src1, $src2)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
- (i32 IntRegs:$src2))))]>;
-
-// Rd=combine(Rt.[HL], Rs.[HL])
-class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst),
- (ins IntRegs:$src1,
- IntRegs:$src2),
- "$dst = combine($src1."# A #", $src2."# B #")", []>;
-
-let isPredicable = 1 in {
- def COMBINE_hh : COMBINE_halves<"H", "H">;
- def COMBINE_hl : COMBINE_halves<"H", "L">;
- def COMBINE_lh : COMBINE_halves<"L", "H">;
- def COMBINE_ll : COMBINE_halves<"L", "L">;
-}
-
-def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))),
- (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg),
- (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>;
-
-// Combines the two immediates SRC1 and SRC2 into a double register.
-class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
- ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
- "$dst = combine(#$src1, #$src2)",
- [(set (i64 DoubleRegs:$dst),
- (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
-
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
-def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
-
// Mux.
def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
DoubleRegs:$src2,
@@ -507,40 +486,24 @@ def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
// ALU32/PRED +
//===----------------------------------------------------------------------===//
-// Conditional combine.
-let neverHasSideEffects = 1, isPredicated = 1 in {
-def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1) $dst = combine($src2, $src3)",
- []>;
-
-let isPredicatedFalse = 1 in
-def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1) $dst = combine($src2, $src3)",
- []>;
-
-let isPredicatedNew = 1 in
-def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if ($src1.new) $dst = combine($src2, $src3)",
- []>;
-
-let isPredicatedNew = 1, isPredicatedFalse = 1 in
-def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "if (!$src1.new) $dst = combine($src2, $src3)",
- []>;
-}
-
// Compare.
defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
-defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
-defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel;
-defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
-defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM1Imm(imm);
+}]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ uint32_t imm = N->getZExtValue();
+ return XformUToUM1Imm(imm);
+}]>;
def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
"$dst = cl0($src1)",
@@ -774,112 +737,153 @@ def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
// CR -
//===----------------------------------------------------------------------===//
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone,
+ [SDNPHasChain]>;
-//===----------------------------------------------------------------------===//
-// J +
-//===----------------------------------------------------------------------===//
-// Jump to address.
-let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
- def JMP : JInst< (outs),
- (ins brtarget:$offset),
- "jump $offset",
- [(br bb:$offset)]>;
-}
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
-// if (p0) jump
-let isBranch = 1, isTerminator=1, Defs = [PC],
- isPredicated = 1 in {
- def JMP_c : JInst< (outs),
- (ins PredRegs:$src, brtarget:$offset),
- "if ($src) jump $offset",
- [(brcond (i1 PredRegs:$src), bb:$offset)]>;
-}
+let InputType = "imm", isBarrier = 1, isPredicable = 1,
+Defs = [PC], isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
+opExtentBits = 24 in
+class T_JMP <dag InsDag, list<dag> JumpList = []>
+ : JInst<(outs), InsDag,
+ "jump $dst" , JumpList> {
+ bits<24> dst;
+
+ let IClass = 0b0101;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24-16} = dst{23-15};
+ let Inst{13-1} = dst{14-2};
+}
+
+let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
+Defs = [PC], isPredicated = 1, opExtentBits = 17 in
+class T_JMP_c <bit PredNot, bit isPredNew, bit isTaken>:
+ JInst<(outs ), (ins PredRegs:$src, brtarget:$dst),
+ !if(PredNot, "if (!$src", "if ($src")#
+ !if(isPredNew, ".new) ", ") ")#"jump"#
+ !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> {
+
+ let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = isPredNew;
+ bits<2> src;
+ bits<17> dst;
+
+ let IClass = 0b0101;
+
+ let Inst{27-24} = 0b1100;
+ let Inst{21} = PredNot;
+ let Inst{12} = !if(isPredNew, isTaken, zero);
+ let Inst{11} = isPredNew;
+ let Inst{9-8} = src;
+ let Inst{23-22} = dst{16-15};
+ let Inst{20-16} = dst{14-10};
+ let Inst{13} = dst{9};
+ let Inst{7-1} = dst{8-2};
+ }
-// if (!p0) jump
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
- isPredicated = 1 in {
- def JMP_cNot : JInst< (outs),
- (ins PredRegs:$src, brtarget:$offset),
- "if (!$src) jump $offset",
- []>;
+let isBarrier = 1, Defs = [PC], isPredicable = 1, InputType = "reg" in
+class T_JMPr<dag InsDag = (ins IntRegs:$dst)>
+ : JRInst<(outs ), InsDag,
+ "jumpr $dst" ,
+ []> {
+ bits<5> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-21} = 0b0010100;
+ let Inst{20-16} = dst;
}
-let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC],
- isPredicated = 1 in {
- def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
- "if ($pred) jump $dst",
- []>;
+let Defs = [PC], isPredicated = 1, InputType = "reg" in
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTaken>:
+ JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst),
+ !if(PredNot, "if (!$src", "if ($src")#
+ !if(isPredNew, ".new) ", ") ")#"jumpr"#
+ !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> {
+
+ let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = isPredNew;
+ bits<2> src;
+ bits<5> dst;
+
+ let IClass = 0b0101;
+
+ let Inst{27-22} = 0b001101;
+ let Inst{21} = PredNot;
+ let Inst{20-16} = dst;
+ let Inst{12} = !if(isPredNew, isTaken, zero);
+ let Inst{11} = isPredNew;
+ let Inst{9-8} = src;
+ let Predicates = !if(isPredNew, [HasV3T], [HasV2T]);
+ let validSubTargets = !if(isPredNew, HasV3SubT, HasV2SubT);
}
-// Jump to address conditioned on new predicate.
-// if (p0) jump:t
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
- isPredicated = 1 in {
- def JMP_cdnPt : JInst< (outs),
- (ins PredRegs:$src, brtarget:$offset),
- "if ($src.new) jump:t $offset",
- []>;
+multiclass JMP_Pred<bit PredNot> {
+ def _#NAME : T_JMP_c<PredNot, 0, 0>;
+ // Predicate new
+ def _#NAME#new_t : T_JMP_c<PredNot, 1, 1>; // taken
+ def _#NAME#new_nt : T_JMP_c<PredNot, 1, 0>; // not taken
}
-// if (!p0) jump:t
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
- isPredicated = 1 in {
- def JMP_cdnNotPt : JInst< (outs),
- (ins PredRegs:$src, brtarget:$offset),
- "if (!$src.new) jump:t $offset",
- []>;
+multiclass JMP_base<string BaseOp> {
+ let BaseOpcode = BaseOp in {
+ def NAME : T_JMP<(ins brtarget:$dst), [(br bb:$dst)]>;
+ defm t : JMP_Pred<0>;
+ defm f : JMP_Pred<1>;
+ }
}
-// Not taken.
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
- isPredicated = 1 in {
- def JMP_cdnPnt : JInst< (outs),
- (ins PredRegs:$src, brtarget:$offset),
- "if ($src.new) jump:nt $offset",
- []>;
+multiclass JMPR_Pred<bit PredNot> {
+ def NAME: T_JMPr_c<PredNot, 0, 0>;
+ // Predicate new
+ def NAME#new_tV3 : T_JMPr_c<PredNot, 1, 1>; // taken
+ def NAME#new_ntV3 : T_JMPr_c<PredNot, 1, 0>; // not taken
}
-// Not taken.
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
- isPredicated = 1 in {
- def JMP_cdnNotPnt : JInst< (outs),
- (ins PredRegs:$src, brtarget:$offset),
- "if (!$src.new) jump:nt $offset",
- []>;
+multiclass JMPR_base<string BaseOp> {
+ let BaseOpcode = BaseOp in {
+ def NAME : T_JMPr;
+ defm _t : JMPR_Pred<0>;
+ defm _f : JMPR_Pred<1>;
+ }
}
-//===----------------------------------------------------------------------===//
-// J -
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// JR +
-//===----------------------------------------------------------------------===//
-def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+let isTerminator = 1, neverHasSideEffects = 1 in {
+let isBranch = 1 in
+defm JMP : JMP_base<"JMP">, PredNewRel;
-// Jump to address from register.
-let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1,
- Defs = [PC], Uses = [R31] in {
- def JMPR: JRInst<(outs), (ins),
- "jumpr r31",
- [(retflag)]>;
-}
+let isBranch = 1, isIndirectBranch = 1 in
+defm JMPR : JMPR_base<"JMPr">, PredNewRel;
-// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
- Defs = [PC], Uses = [R31] in {
- def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
- "if ($src1) jumpr r31",
- []>;
+let isReturn = 1, isCodeGenOnly = 1 in
+defm JMPret : JMPR_base<"JMPret">, PredNewRel;
}
-// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
- Defs = [PC], Uses = [R31] in {
- def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
- "if (!$src1) jumpr r31",
- []>;
-}
+def : Pat<(retflag),
+ (JMPret (i32 R31))>;
+
+def : Pat <(brcond (i1 PredRegs:$src1), bb:$offset),
+ (JMP_t (i1 PredRegs:$src1), bb:$offset)>;
+
+// A return through builtin_eh_return.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, neverHasSideEffects = 1,
+isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
+def EH_RETURN_JMPR : T_JMPr;
+
+def : Pat<(eh_return),
+ (EH_RETURN_JMPR (i32 R31))>;
+
+def : Pat<(HexagonBR_JT (i32 IntRegs:$dst)),
+ (JMPR (i32 IntRegs:$dst))>;
+
+def : Pat<(brind (i32 IntRegs:$dst)),
+ (JMPR (i32 IntRegs:$dst))>;
//===----------------------------------------------------------------------===//
// JR -
@@ -892,7 +896,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
// Load -- MEMri operand
multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
bit isNot, bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : LDInst2<(outs RC:$dst),
(ins PredRegs:$src1, MEMri:$addr),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -901,7 +905,7 @@ multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
}
multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
@@ -958,7 +962,7 @@ def : Pat < (i64 (load ADDRriS11_3:$addr)),
// Load - Base with Immediate offset addressing mode
multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
bit isNot, bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : LDInst2<(outs RC:$dst),
(ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -968,7 +972,7 @@ multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
// Predicate new
defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
@@ -1038,7 +1042,7 @@ def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
bit isNot, bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
(ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1049,7 +1053,7 @@ multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
Operand ImmOp, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
// Predicate new
let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1366,7 +1370,7 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
bit isNot, bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : STInst2PI<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1377,7 +1381,7 @@ multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
Operand ImmOp, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
// Predicate new
let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1431,7 +1435,7 @@ def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
//===----------------------------------------------------------------------===//
multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : STInst2<(outs),
(ins PredRegs:$src1, MEMri:$addr, RC: $src2),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1440,7 +1444,7 @@ multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
@@ -1497,7 +1501,7 @@ def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
//===----------------------------------------------------------------------===//
multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
bit isNot, bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1507,7 +1511,7 @@ multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
bit PredNot> {
- let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in {
+ let isPredicatedFalse = PredNot, isPredicated = 1 in {
defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
// Predicate new
@@ -2023,20 +2027,18 @@ let isCall = 1, neverHasSideEffects = 1,
[]>;
}
-// Tail Calls.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
- def TCRETURNtg : JInst<(outs), (ins calltarget:$dst),
- "jump $dst // TAILCALL", []>;
-}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
- def TCRETURNtext : JInst<(outs), (ins calltarget:$dst),
- "jump $dst // TAILCALL", []>;
-}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
- def TCRETURNR : JInst<(outs), (ins IntRegs:$dst),
- "jumpr $dst // TAILCALL", []>;
+// Indirect tail-call.
+let isCodeGenOnly = 1, isCall = 1, isReturn = 1 in
+def TCRETURNR : T_JMPr;
+
+// Direct tail-calls.
+let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+isTerminator = 1, isCodeGenOnly = 1 in {
+ def TCRETURNtg : T_JMP<(ins calltarget:$dst)>;
+ def TCRETURNtext : T_JMP<(ins calltarget:$dst)>;
}
+
// Map call instruction.
def : Pat<(call (i32 IntRegs:$dst)),
(CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
@@ -2133,10 +2135,11 @@ def : Pat <(add (i1 PredRegs:$src1), -1),
// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
(i32 IntRegs:$src3),
(i32 IntRegs:$src4)),
- (i32 (TFR_condset_rr (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ (i32 (TFR_condset_rr (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)),
(i32 IntRegs:$src4), (i32 IntRegs:$src3)))>,
Requires<[HasV2TOnly]>;
@@ -2154,18 +2157,25 @@ def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2,
// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
// => r0 = TFR_condset_ir(p0, #i, r1)
-def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, s12ImmPred:$src3),
+def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3),
(i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3,
(i32 IntRegs:$src2)))>;
// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
-def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
- (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
+def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset),
+ (JMP_f (i1 PredRegs:$src1), bb:$offset)>;
// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
-def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))),
(i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
+
+let AddedComplexity = 100 in
+def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
+ (i64 (COMBINE_rr (TFRI 0),
+ (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>,
+ Requires<[NoV4T]>;
+
// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
let AddedComplexity = 10 in
def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
@@ -2186,43 +2196,46 @@ def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
subreg_loreg))))))>;
// We want to prevent emitting pnot's as much as possible.
-// Map brcond with an unsupported setcc to a JMP_cNot.
+// Map brcond with an unsupported setcc to a JMP_f.
def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
bb:$offset),
- (JMP_cNot (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ (JMP_f (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
bb:$offset)>;
def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
bb:$offset),
- (JMP_cNot (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
+ (JMP_f (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
- (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
+ (JMP_f (i1 PredRegs:$src1), bb:$offset)>;
def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
- (JMP_c (i1 PredRegs:$src1), bb:$offset)>;
+ (JMP_t (i1 PredRegs:$src1), bb:$offset)>;
+// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
bb:$offset),
- (JMP_cNot (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2), bb:$offset)>;
+ (JMP_f (CMPGTri (i32 IntRegs:$src1),
+ (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>;
+// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
bb:$offset),
- (JMP_c (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>;
+ (JMP_t (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
bb:$offset),
- (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
+ (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
bb:$offset)>;
def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
bb:$offset),
- (JMP_cNot (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ (JMP_f (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
bb:$offset)>;
def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
bb:$offset),
- (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+ (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
bb:$offset)>;
// Map from a 64-bit select to an emulated 64-bit mux.
@@ -2300,8 +2313,8 @@ def : Pat<(i64 (anyext (i32 IntRegs:$src1))),
// Map cmple -> cmpgt.
// rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ImmPred:$src2)),
- (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ImmPred:$src2)))>;
+def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
+ (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ExtPred:$src2)))>;
// rs <= rt -> !(rs > rt).
def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
@@ -2314,8 +2327,8 @@ def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
// Map cmpne -> cmpeq.
// Hexagon_TODO: We should improve on this.
// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
- (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2))))>;
+def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
+ (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ExtPred:$src2))))>;
// Map cmpne(Rs) -> !cmpeqe(Rs).
// rs != rt -> !(rs == rt).
@@ -2337,8 +2350,9 @@ def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
(i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
-def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ImmPred:$src2)),
- (i1 (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2))>;
+// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
+def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)),
+ (i1 (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
// rss >= rtt -> !(rtt > rss).
@@ -2347,9 +2361,10 @@ def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
(i64 DoubleRegs:$src1)))))>;
// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
// rs < rt -> !(rs >= rt).
-def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
- (i1 (NOT_p (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2)))>;
+def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
+ (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>;
// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs).
// rs < rt -> rt > rs.
@@ -2373,13 +2388,17 @@ def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
(i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
-// Generate cmpgeu(Rs, #u8)
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ImmPred:$src2)),
- (i1 (CMPGEUri (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)),
+ (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>;
+
+// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)),
+ (i1 (CMPGTUri (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
// Generate cmpgtu(Rs, #u9)
-def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)),
- (i1 (CMPGTUri (i32 IntRegs:$src1), u9ImmPred:$src2))>;
+def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)),
+ (i1 (CMPGTUri (i32 IntRegs:$src1), u9ExtPred:$src2))>;
// Map from Rs >= Rt -> !(Rt > Rs).
// rs >= rt -> !(rt > rs).
@@ -2391,7 +2410,7 @@ def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
(i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
-// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt).
// Map from (Rs <= Rt) -> !(Rs > Rt).
def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
(i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
@@ -2487,6 +2506,13 @@ def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
Requires<[NoV4T]>;
+let AddedComplexity = 100 in
+def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+ (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+ s11_2ExtPred:$offset)))>,
+ Requires<[NoV4T]>;
+
+let AddedComplexity = 10 in
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
(i32 (LDriw ADDRriS11_0:$src1))>;
@@ -2503,6 +2529,48 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
(i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>;
+let AddedComplexity = 100 in
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zextloadi32 (i32 (add IntRegs:$src2,
+ s11_2ExtPred:$offset2)))))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (LDriw_indexed IntRegs:$src2,
+ s11_2ExtPred:$offset2)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (LDriw ADDRriS11_2:$srcLow)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zext (i32 IntRegs:$srcLow))))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ IntRegs:$srcLow))>;
+
+let AddedComplexity = 100 in
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zextloadi32 (i32 (add IntRegs:$src2,
+ s11_2ExtPred:$offset2)))))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (LDriw_indexed IntRegs:$src2,
+ s11_2ExtPred:$offset2)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ (LDriw ADDRriS11_2:$srcLow)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+ (i32 32))),
+ (i64 (zext (i32 IntRegs:$srcLow))))),
+ (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+ IntRegs:$srcLow))>;
+
// Any extended 64-bit load.
// anyext i32 -> i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
@@ -2637,19 +2705,6 @@ let AddedComplexity = 100 in
def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
(COPY (i32 IntRegs:$src1))>;
-def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
-
-let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
-def BR_JT : JRInst<(outs), (ins IntRegs:$src),
- "jumpr $src",
- [(HexagonBR_JT (i32 IntRegs:$src))]>;
-
-let isBranch=1, isIndirectBranch=1, isTerminator=1 in
-def BRIND : JRInst<(outs), (ins IntRegs:$src),
- "jumpr $src",
- [(brind (i32 IntRegs:$src))]>;
-
def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
def : Pat<(HexagonWrapperJT tjumptable:$dst),
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
index 157ab3d..7e75554 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -11,6 +11,11 @@
//
//===----------------------------------------------------------------------===//
+def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
//===----------------------------------------------------------------------===//
// J +
@@ -40,41 +45,6 @@ let isCall = 1, neverHasSideEffects = 1,
[]>, Requires<[HasV3TOnly]>;
}
-
-// Jump to address from register
-// if(p?.new) jumpr:t r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
- Defs = [PC], Uses = [R31] in {
- def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) jumpr:t $src2",
- []>, Requires<[HasV3T]>;
-}
-
-// if (!p?.new) jumpr:t r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
- Defs = [PC], Uses = [R31] in {
- def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) jumpr:t $src2",
- []>, Requires<[HasV3T]>;
-}
-
-// Not taken.
-// if(p?.new) jumpr:nt r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
- Defs = [PC], Uses = [R31] in {
- def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
- "if ($src1.new) jumpr:nt $src2",
- []>, Requires<[HasV3T]>;
-}
-
-// if (!p?.new) jumpr:nt r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
- Defs = [PC], Uses = [R31] in {
- def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
- "if (!$src1.new) jumpr:nt $src2",
- []>, Requires<[HasV3T]>;
-}
-
//===----------------------------------------------------------------------===//
// JR -
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
index cd0e475..933239d 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -209,105 +209,31 @@ def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
//===----------------------------------------------------------------------===//
// LD +
//===----------------------------------------------------------------------===//
-//
-// These absolute set addressing mode instructions accept immediate as
-// an operand. We have duplicated these patterns to take global address.
-
+//===----------------------------------------------------------------------===//
+// Template class for load instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in {
-def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins u0AlwaysExt:$addr),
- "$dst1 = memd($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memb(Re=#U6)
-def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u0AlwaysExt:$addr),
- "$dst1 = memb($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memh(Re=#U6)
-def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u0AlwaysExt:$addr),
- "$dst1 = memh($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memub(Re=#U6)
-def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+validSubTargets = HasV4SubT in
+class T_LD_abs_set<string mnemonic, RegisterClass RC>:
+ LDInst2<(outs RC:$dst1, IntRegs:$dst2),
(ins u0AlwaysExt:$addr),
- "$dst1 = memub($dst2=##$addr)",
+ "$dst1 = "#mnemonic#"($dst2=##$addr)",
[]>,
Requires<[HasV4T]>;
-// Rd=memuh(Re=#U6)
-def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u0AlwaysExt:$addr),
- "$dst1 = memuh($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
+def LDrid_abs_set_V4 : T_LD_abs_set <"memd", DoubleRegs>;
+def LDrib_abs_set_V4 : T_LD_abs_set <"memb", IntRegs>;
+def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>;
+def LDrih_abs_set_V4 : T_LD_abs_set <"memh", IntRegs>;
+def LDriw_abs_set_V4 : T_LD_abs_set <"memw", IntRegs>;
+def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>;
-// Rd=memw(Re=#U6)
-def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins u0AlwaysExt:$addr),
- "$dst1 = memw($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-}
-
-// Following patterns are defined for absolute set addressing mode
-// instruction which take global address as operand.
-let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in {
-def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
- (ins globaladdressExt:$addr),
- "$dst1 = memd($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memb(Re=#U6)
-def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdressExt:$addr),
- "$dst1 = memb($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memh(Re=#U6)
-def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdressExt:$addr),
- "$dst1 = memh($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memub(Re=#U6)
-def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdressExt:$addr),
- "$dst1 = memub($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memuh(Re=#U6)
-def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdressExt:$addr),
- "$dst1 = memuh($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-
-// Rd=memw(Re=#U6)
-def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
- (ins globaladdressExt:$addr),
- "$dst1 = memw($dst2=##$addr)",
- []>,
- Requires<[HasV4T]>;
-}
// multiclass for load instructions with base + register offset
// addressing mode
multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : LDInst2<(outs RC:$dst),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -316,7 +242,7 @@ multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
@@ -527,78 +453,29 @@ def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
// ST +
//===----------------------------------------------------------------------===//
///
-/// Assumptions::: ****** DO NOT IGNORE ********
-/// 1. Make sure that in post increment store, the zero'th operand is always the
-/// post increment operand.
-/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
-/// last operand.
-///
-
-// memd(Re=#U)=Rtt
-let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
-def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins DoubleRegs:$src1, u0AlwaysExt:$src2),
- "memd($dst1=##$src2) = $src1",
- []>,
- Requires<[HasV4T]>;
-
-// memb(Re=#U)=Rs
-def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u0AlwaysExt:$src2),
- "memb($dst1=##$src2) = $src1",
- []>,
- Requires<[HasV4T]>;
-
-// memh(Re=#U)=Rs
-def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u0AlwaysExt:$src2),
- "memh($dst1=##$src2) = $src1",
- []>,
- Requires<[HasV4T]>;
-
-// memw(Re=#U)=Rs
-def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, u0AlwaysExt:$src2),
- "memw($dst1=##$src2) = $src1",
- []>,
- Requires<[HasV4T]>;
-}
-
-// memd(Re=#U)=Rtt
-let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
-def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins DoubleRegs:$src1, globaladdressExt:$src2),
- "memd($dst1=##$src2) = $src1",
- []>,
- Requires<[HasV4T]>;
-
-// memb(Re=#U)=Rs
-def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdressExt:$src2),
- "memb($dst1=##$src2) = $src1",
+//===----------------------------------------------------------------------===//
+// Template class for store instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
+class T_ST_abs_set<string mnemonic, RegisterClass RC>:
+ STInst2<(outs IntRegs:$dst1),
+ (ins RC:$src1, u0AlwaysExt:$src2),
+ mnemonic#"($dst1=##$src2) = $src1",
[]>,
Requires<[HasV4T]>;
-// memh(Re=#U)=Rs
-def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdressExt:$src2),
- "memh($dst1=##$src2) = $src1",
- []>,
- Requires<[HasV4T]>;
-
-// memw(Re=#U)=Rs
-def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
- (ins IntRegs:$src1, globaladdressExt:$src2),
- "memw($dst1=##$src2) = $src1",
- []>,
- Requires<[HasV4T]>;
-}
+def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>;
+def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>;
+def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>;
+def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>;
+//===----------------------------------------------------------------------===//
// multiclass for store instructions with base + register offset addressing
// mode
+//===----------------------------------------------------------------------===//
multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
RC:$src5),
@@ -609,7 +486,7 @@ multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
// Predicate new
defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
@@ -637,7 +514,7 @@ multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
// addressing mode.
multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME#_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
RC:$src5),
@@ -648,7 +525,7 @@ multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
// Predicate new
defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
@@ -711,17 +588,59 @@ def : Pat<(store (i64 DoubleRegs:$src4),
u2ImmPred:$src3, DoubleRegs:$src4)>;
}
-// memd(Ru<<#u2+#U6)=Rtt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10,
-validSubTargets = HasV4SubT in
-def STrid_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4),
- "memd($src1<<#$src2+#$src3) = $src4",
- [(store (i64 DoubleRegs:$src4),
+let isExtended = 1, opExtendable = 2 in
+class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> :
+ STInst<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4),
+ mnemonic#"($src1<<#$src2+##$src3) = $src4",
+ [(stOp (VT RC:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
u0AlwaysExtPred:$src3))]>,
Requires<[HasV4T]>;
+let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in
+class T_ST_LongOff_nv <string mnemonic> :
+ NVInst_V4<(outs),
+ (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+ mnemonic#"($src1<<#$src2+##$src3) = $src4.new",
+ []>,
+ Requires<[HasV4T]>;
+
+multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> {
+ let BaseOpcode = BaseOp#"_shl" in {
+ let isNVStorable = 1 in
+ def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>;
+
+ def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>;
+ }
+}
+
+let AddedComplexity = 10, validSubTargets = HasV4SubT in {
+ def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>;
+ defm STrib_shl : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel;
+ defm STrih_shl : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel;
+ defm STriw_shl : ST_LongOff <"memw", "STriw", store>, NewValueRel;
+}
+
+let AddedComplexity = 40 in
+multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT,
+ PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+ (add (shl IntRegs:$src1, u2ImmPred:$src2),
+ (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
+ (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+ (add IntRegs:$src1,
+ (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
+ (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
+
+defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>;
+defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>;
+defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>;
+defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>;
+
// memd(Rx++#s4:3)=Rtt
// memd(Rx++#s4:3:circ(Mu))=Rtt
// memd(Rx++I:circ(Mu))=Rtt
@@ -741,7 +660,7 @@ def STrid_shl_V4 : STInst<(outs),
//===----------------------------------------------------------------------===//
multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : STInst2<(outs),
(ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -751,7 +670,7 @@ multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
}
multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
// Predicate new
defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
@@ -799,17 +718,6 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
(STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
Requires<[HasV4T]>;
-// memb(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STrib_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memb($src1<<#$src2+#$src3) = $src4",
- [(truncstorei8 (i32 IntRegs:$src4),
- (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3))]>,
- Requires<[HasV4T]>;
-
// memb(Rx++#s4:0:circ(Mu))=Rt
// memb(Rx++I:circ(Mu))=Rt
// memb(Rx++Mu)=Rt
@@ -830,17 +738,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
// TODO: needs to be implemented.
// memh(Ru<<#u2+#U6)=Rt.H
-// memh(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STrih_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memh($src1<<#$src2+#$src3) = $src4",
- [(truncstorei16 (i32 IntRegs:$src4),
- (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3))]>,
- Requires<[HasV4T]>;
-
// memh(Rx++#s4:1:circ(Mu))=Rt.H
// memh(Rx++#s4:1:circ(Mu))=Rt
// memh(Rx++I:circ(Mu))=Rt.H
@@ -877,17 +774,6 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
(STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
Requires<[HasV4T]>;
-// memw(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STriw_shl_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memw($src1<<#$src2+#$src3) = $src4",
- [(store (i32 IntRegs:$src4),
- (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3))]>,
- Requires<[HasV4T]>;
-
// memw(Rx++#s4:2)=Rt
// memw(Rx++#s4:2:circ(Mu))=Rt
// memw(Rx++I:circ(Mu))=Rt
@@ -907,7 +793,7 @@ def STriw_shl_V4 : STInst<(outs),
//
multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
Operand predImmOp, bit isNot, bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME#_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -918,7 +804,7 @@ multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
// Predicate new
defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
@@ -960,7 +846,7 @@ let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
// and MEMri operand.
multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME#_nv_V4 : NVInst_V4<(outs),
(ins PredRegs:$src1, MEMri:$addr, RC: $src2),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -970,7 +856,7 @@ multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
// Predicate new
@@ -1006,15 +892,6 @@ mayStore = 1 in {
defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
}
-// memb(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STrib_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memb($src1<<#$src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
//===----------------------------------------------------------------------===//
// Post increment store
// mem[bhwd](Rx++#s4:[0123])=Nt.new
@@ -1022,7 +899,7 @@ def STrib_shl_nv_V4 : NVInst_V4<(outs),
multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
bit isNot, bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1034,7 +911,7 @@ multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
Operand ImmOp, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
// Predicate new
let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1072,29 +949,11 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
// memb(Rx++I:circ(Mu))=Nt.new
// memb(Rx++Mu)=Nt.new
// memb(Rx++Mu:brev)=Nt.new
-// memh(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STrih_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memh($src1<<#$src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
// memh(Rx++#s4:1:circ(Mu))=Nt.new
// memh(Rx++I:circ(Mu))=Nt.new
// memh(Rx++Mu)=Nt.new
// memh(Rx++Mu:brev)=Nt.new
-// memw(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STriw_shl_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
- "memw($src1<<#$src2+#$src3) = $src4.new",
- []>,
- Requires<[HasV4T]>;
-
// memw(Rx++#s4:2:circ(Mu))=Nt.new
// memw(Rx++I:circ(Mu))=Nt.new
// memw(Rx++Mu)=Nt.new
@@ -1108,179 +967,193 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs),
// NV/J +
//===----------------------------------------------------------------------===//
-multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> {
- def _ie_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1.new, $src2)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps with the register
+// operands.
+//===----------------------------------------------------------------------===//
- def _nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1.new, $src2)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
-}
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
+class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
+ bit isNegCond, bit isTaken>
+ : NVInst_V4<(outs),
+ (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+ "if ("#!if(isNegCond, "!","")#mnemonic#
+ "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
+ "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
+ #!if(isTaken, "t","nt")#" $offset",
+ []>, Requires<[HasV4T]> {
-multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr,
- string TakenStr> {
- def _ie_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1, $src2.new)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
+ bits<5> src1;
+ bits<5> src2;
+ bits<3> Ns; // New-Value Operand
+ bits<5> RegOp; // Non New-Value Operand
+ bits<11> offset;
- def _nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1, $src2.new)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
-}
+ let isBrTaken = !if(isTaken, "true", "false");
+ let isPredicatedFalse = isNegCond;
-multiclass NVJ_type_basic_imm<string NotStr, string OpcStr, string TakenStr> {
- def _ie_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1.new, #$src2)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
+ let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0});
+ let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
- def _nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1.new, #$src2)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
+ let IClass = 0b0010;
+ let Inst{26} = 0b0;
+ let Inst{25-23} = majOp;
+ let Inst{22} = isNegCond;
+ let Inst{18-16} = Ns;
+ let Inst{13} = isTaken;
+ let Inst{12-8} = RegOp;
+ let Inst{21-20} = offset{10-9};
+ let Inst{7-1} = offset{8-2};
}
-multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> {
- def _ie_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1.new, #$src2)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
- def _nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1.new, #$src2)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
+multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
+ bit isNegCond> {
+ // Branch not taken:
+ def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
+ // Branch taken:
+ def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
}
-multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr,
- string TakenStr> {
- def _ie_nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1.new, #$src2)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
+// NvOpNum = 0 -> First Operand is a new-value Register
+// NvOpNum = 1 -> Second Operand is a new-value Register
- def _nv_V4 : NVInst_V4<(outs),
- (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
- !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
- !strconcat("($src1.new, #$src2)) jump:",
- !strconcat(TakenStr, " $offset"))))),
- []>,
- Requires<[HasV4T]>;
+multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
+ bit NvOpNum> {
+ let BaseOpcode = BaseOp#_NVJ in {
+ defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
+ defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
+ }
}
-// Multiclass for regular dot new of Ist operand register.
-multiclass NVJ_type_br_pred_reg<string NotStr, string OpcStr> {
- defm Pt : NVJ_type_basic_reg<NotStr, OpcStr, "t">;
- defm Pnt : NVJ_type_basic_reg<NotStr, OpcStr, "nt">;
-}
+// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
-// Multiclass for dot new of 2nd operand register.
-multiclass NVJ_type_br_pred_2ndDotNew<string NotStr, string OpcStr> {
- defm Pt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "t">;
- defm Pnt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "nt">;
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+ Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
+ defm CMPEQrr : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel;
+ defm CMPGTrr : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel;
+ defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
+ defm CMPLTrr : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel;
+ defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
}
-// Multiclass for 2nd operand immediate, including -1.
-multiclass NVJ_type_br_pred_imm<string NotStr, string OpcStr> {
- defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
- defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
- defm Ptneg : NVJ_type_basic_neg<NotStr, OpcStr, "t">;
- defm Pntneg : NVJ_type_basic_neg<NotStr, OpcStr, "nt">;
-}
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an unsigned immediate (U5) operand.
+//===----------------------------------------------------------------------===//
-// Multiclass for 2nd operand immediate, excluding -1.
-multiclass NVJ_type_br_pred_imm_only<string NotStr, string OpcStr> {
- defm Pt : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
- defm Pnt : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
-}
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
+class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
+ bit isTaken>
+ : NVInst_V4<(outs),
+ (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+ "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
+ #!if(isTaken, "t","nt")#" $offset",
+ []>, Requires<[HasV4T]> {
-// Multiclass for tstbit, where 2nd operand is always #0.
-multiclass NVJ_type_br_pred_tstbit<string NotStr, string OpcStr> {
- defm Pt : NVJ_type_basic_tstbit<NotStr, OpcStr, "t">;
- defm Pnt : NVJ_type_basic_tstbit<NotStr, OpcStr, "nt">;
+ let isPredicatedFalse = isNegCond;
+ let isBrTaken = !if(isTaken, "true", "false");
+
+ bits<3> src1;
+ bits<5> src2;
+ bits<11> offset;
+
+ let IClass = 0b0010;
+ let Inst{26} = 0b1;
+ let Inst{25-23} = majOp;
+ let Inst{22} = isNegCond;
+ let Inst{18-16} = src1;
+ let Inst{13} = isTaken;
+ let Inst{12-8} = src2;
+ let Inst{21-20} = offset{10-9};
+ let Inst{7-1} = offset{8-2};
}
-// Multiclass for GT.
-multiclass NVJ_type_rr_ri<string OpcStr> {
- defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
- defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
- defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
- defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>;
- defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>;
- defm ri : NVJ_type_br_pred_imm<"", OpcStr>;
+multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
+ // Branch not taken:
+ def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>;
+ // Branch taken:
+ def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>;
}
-// Multiclass for EQ.
-multiclass NVJ_type_rr_ri_no_2ndDotNew<string OpcStr> {
- defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
- defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
- defm riNot : NVJ_type_br_pred_imm<"!", OpcStr>;
- defm ri : NVJ_type_br_pred_imm<"", OpcStr>;
+multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
+ let BaseOpcode = BaseOp#_NVJri in {
+ defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
+ defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
+ }
}
-// Multiclass for GTU.
-multiclass NVJ_type_rr_ri_no_nOne<string OpcStr> {
- defm rrNot : NVJ_type_br_pred_reg<"!", OpcStr>;
- defm rr : NVJ_type_br_pred_reg<"", OpcStr>;
- defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
- defm rrdn : NVJ_type_br_pred_2ndDotNew<"", OpcStr>;
- defm riNot : NVJ_type_br_pred_imm_only<"!", OpcStr>;
- defm ri : NVJ_type_br_pred_imm_only<"", OpcStr>;
+// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+ Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
+ defm CMPEQri : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
+ defm CMPGTri : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
+ defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
}
-// Multiclass for tstbit.
-multiclass NVJ_type_r0<string OpcStr> {
- defm r0Not : NVJ_type_br_pred_tstbit<"!", OpcStr>;
- defm r0 : NVJ_type_br_pred_tstbit<"", OpcStr>;
- }
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an hardcoded 0/-1 immediate value.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in
+class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
+ bit isNegCond, bit isTaken>
+ : NVInst_V4<(outs),
+ (ins IntRegs:$src1, brtarget:$offset),
+ "if ("#!if(isNegCond, "!","")#mnemonic
+ #"($src1.new, #"#ImmVal#")) jump:"
+ #!if(isTaken, "t","nt")#" $offset",
+ []>, Requires<[HasV4T]> {
-// Base Multiclass for New Value Jump.
-multiclass NVJ_type {
- defm GT : NVJ_type_rr_ri<"cmp.gt">;
- defm EQ : NVJ_type_rr_ri_no_2ndDotNew<"cmp.eq">;
- defm GTU : NVJ_type_rr_ri_no_nOne<"cmp.gtu">;
- defm TSTBIT : NVJ_type_r0<"tstbit">;
+ let isPredicatedFalse = isNegCond;
+ let isBrTaken = !if(isTaken, "true", "false");
+
+ bits<3> src1;
+ bits<11> offset;
+ let IClass = 0b0010;
+ let Inst{26} = 0b1;
+ let Inst{25-23} = majOp;
+ let Inst{22} = isNegCond;
+ let Inst{18-16} = src1;
+ let Inst{13} = isTaken;
+ let Inst{21-20} = offset{10-9};
+ let Inst{7-1} = offset{8-2};
}
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
- defm JMP_ : NVJ_type;
+multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
+ bit isNegCond> {
+ // Branch not taken:
+ def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
+ // Branch taken:
+ def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
}
-//===----------------------------------------------------------------------===//
-// NV/J -
-//===----------------------------------------------------------------------===//
+multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
+ string ImmVal> {
+ let BaseOpcode = BaseOp#_NVJ_ConstImm in {
+ defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True cond
+ defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False Cond
+ }
+}
+
+// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2
+// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
+ Defs = [PC], neverHasSideEffects = 1 in {
+ defm TSTBIT0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
+ defm CMPEQn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "-1">, PredRel;
+ defm CMPGTn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "-1">, PredRel;
+}
//===----------------------------------------------------------------------===//
// XTYPE/ALU +
@@ -2286,7 +2159,7 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
bb:$offset),
- (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
+ (JMP_f (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
bb:$offset)>,
Requires<[HasV4T]>;
@@ -2769,9 +2642,9 @@ let isReturn = 1, isTerminator = 1,
multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME#_V4 : STInst2<(outs),
- (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#mnemonic#"(##$absaddr) = $src2",
[]>,
@@ -2779,7 +2652,7 @@ multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
// Predicate new
defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
@@ -2791,7 +2664,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
let opExtendable = 0, isPredicable = 1 in
def NAME#_V4 : STInst2<(outs),
- (ins globaladdressExt:$absaddr, RC:$src),
+ (ins u0AlwaysExt:$absaddr, RC:$src),
mnemonic#"(##$absaddr) = $src",
[]>,
Requires<[HasV4T]>;
@@ -2805,9 +2678,9 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+ (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#mnemonic#"(##$absaddr) = $src2.new",
[]>,
@@ -2815,7 +2688,7 @@ multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
// Predicate new
defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
@@ -2827,7 +2700,7 @@ multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
let opExtendable = 0, isPredicable = 1 in
def NAME#_nv_V4 : NVInst_V4<(outs),
- (ins globaladdressExt:$absaddr, RC:$src),
+ (ins u0AlwaysExt:$absaddr, RC:$src),
mnemonic#"(##$absaddr) = $src.new",
[]>,
Requires<[HasV4T]>;
@@ -2840,16 +2713,19 @@ multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
}
let addrMode = Absolute in {
+ let accessSize = ByteAccess in
defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>,
ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel;
+ let accessSize = HalfWordAccess in
defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>,
ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel;
+ let accessSize = WordAccess in
defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>,
ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel;
- let isNVStorable = 0 in
+ let accessSize = DoubleWordAccess, isNVStorable = 0 in
defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel;
}
@@ -2875,6 +2751,7 @@ def : Pat<(store (i64 DoubleRegs:$src1),
// mem[bhwd](#global)=Rt
// if ([!]Pv[.new]) mem[bhwd](##global) = Rt
//===----------------------------------------------------------------------===//
+let mayStore = 1, isNVStorable = 1 in
multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
let BaseOpcode = BaseOp, isPredicable = 1 in
def NAME#_V4 : STInst2<(outs),
@@ -2909,15 +2786,16 @@ multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
}
}
-let validSubTargets = HasV4SubT, validSubTargets = HasV4SubT in {
-defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>,
- ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ;
-defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>,
- ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ;
-defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>,
- ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ;
-defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>,
- ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ;
+let validSubTargets = HasV4SubT, neverHasSideEffects = 1 in {
+ let isNVStorable = 0 in
+ defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, PredNewRel;
+
+ defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>,
+ ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel;
+ defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>,
+ ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel;
+ defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>,
+ ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel;
}
// 64 bit atomic store
@@ -2974,9 +2852,9 @@ def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
//===----------------------------------------------------------------------===//
multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
bit isPredNew> {
- let PNewValue = !if(isPredNew, "new", "") in
+ let isPredicatedNew = isPredNew in
def NAME : LDInst2<(outs RC:$dst),
- (ins PredRegs:$src1, globaladdressExt:$absaddr),
+ (ins PredRegs:$src1, u0AlwaysExt:$absaddr),
!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#"$dst = "#mnemonic#"(##$absaddr)",
[]>,
@@ -2984,7 +2862,7 @@ multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
}
multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
- let PredSense = !if(PredNot, "false", "true") in {
+ let isPredicatedFalse = PredNot in {
defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
// Predicate new
defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
@@ -2996,7 +2874,7 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
let opExtendable = 1, isPredicable = 1 in
def NAME#_V4 : LDInst2<(outs RC:$dst),
- (ins globaladdressExt:$absaddr),
+ (ins u0AlwaysExt:$absaddr),
"$dst = "#mnemonic#"(##$absaddr)",
[]>,
Requires<[HasV4T]>;
@@ -3009,33 +2887,37 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
}
let addrMode = Absolute in {
+ let accessSize = ByteAccess in {
defm LDrib_abs : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel;
defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel;
+ }
+ let accessSize = HalfWordAccess in {
defm LDrih_abs : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel;
defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+ }
+ let accessSize = WordAccess in
defm LDriw_abs : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel;
+
+ let accessSize = DoubleWordAccess in
defm LDrid_abs : LD_Abs<"memd", "LDrid", DoubleRegs>, AddrModeRel;
}
-let Predicates = [HasV4T], AddedComplexity = 30 in
+let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
(LDriw_abs_V4 tglobaladdr: $absaddr)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
(LDrib_abs_V4 tglobaladdr:$absaddr)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
(LDriub_abs_V4 tglobaladdr:$absaddr)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
(LDrih_abs_V4 tglobaladdr:$absaddr)>;
-let Predicates = [HasV4T], AddedComplexity=30 in
def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
(LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+}
//===----------------------------------------------------------------------===//
// multiclass for load instructions with GP-relative addressing mode.
@@ -3058,12 +2940,12 @@ multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
}
}
-defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>;
-defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>;
-defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>;
-defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>;
-defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>;
-defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>;
+defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>, PredNewRel;
+defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>, PredNewRel;
+defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>, PredNewRel;
+defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>, PredNewRel;
+defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>, PredNewRel;
+defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>, PredNewRel;
def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
(i64 (LDd_GP_V4 tglobaladdr:$global))>;
@@ -3139,9 +3021,10 @@ def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
// Transfer global address into a register
-let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
-def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
- "$dst = ##$src1",
+let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1,
+isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in
+def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
+ "$dst = #$src1",
[(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
Requires<[HasV4T]>;
@@ -3185,19 +3068,21 @@ def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
// Load - Indirect with long offset: These instructions take global address
// as an operand
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 3, AddedComplexity = 40,
+validSubTargets = HasV4SubT in
def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
"$dst=memd($src1<<#$src2+##$offset)",
[(set (i64 DoubleRegs:$dst),
(load (add (shl IntRegs:$src1, u2ImmPred:$src2),
(HexagonCONST32 tglobaladdr:$offset))))]>,
Requires<[HasV4T]>;
-let AddedComplexity = 10 in
+let AddedComplexity = 40 in
multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
+let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in
def _lo_V4 : LDInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+ (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
!strconcat("$dst = ",
!strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
[(set IntRegs:$dst,
@@ -3208,202 +3093,53 @@ multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
+defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>;
defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
+defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>;
defm LDriw_ind : LD_indirect_lo<"memw", load>;
-// Store - Indirect with long offset: These instructions take global address
-// as an operand
-let AddedComplexity = 10 in
-def STrid_ind_lo_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
- DoubleRegs:$src4),
- "memd($src1<<#$src2+#$src3) = $src4",
- [(store (i64 DoubleRegs:$src4),
- (add (shl IntRegs:$src1, u2ImmPred:$src2),
- (HexagonCONST32 tglobaladdr:$src3)))]>,
- Requires<[HasV4T]>;
-
-let AddedComplexity = 10 in
-multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> {
- def _lo_V4 : STInst<(outs),
- (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
- IntRegs:$src4),
- !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"),
- [(OpNode (i32 IntRegs:$src4),
- (add (shl IntRegs:$src1, u2ImmPred:$src2),
- (HexagonCONST32 tglobaladdr:$src3)))]>,
- Requires<[HasV4T]>;
-}
-
-defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>;
-defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
-defm STriw_ind : ST_indirect_lo<"memw", store>;
-
-// Store - absolute addressing mode: These instruction take constant
-// value as the extended operand.
-multiclass ST_absimm<string OpcStr> {
-let isExtended = 1, opExtendable = 0, isPredicable = 1,
-validSubTargets = HasV4SubT in
- def _abs_V4 : STInst2<(outs),
- (ins u0AlwaysExt:$src1, IntRegs:$src2),
- !strconcat(OpcStr, "(##$src1) = $src2"),
- []>,
- Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 1, isPredicated = 1,
-validSubTargets = HasV4SubT in {
- def _abs_cPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
- !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
- !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cdnPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
- !strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(##$src2) = $src3")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cdnNotPt_V4 : STInst2<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
- !strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(##$src2) = $src3")),
- []>,
- Requires<[HasV4T]>;
-}
-
-let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1,
-validSubTargets = HasV4SubT in
- def _abs_nv_V4 : NVInst_V4<(outs),
- (ins u0AlwaysExt:$src1, IntRegs:$src2),
- !strconcat(OpcStr, "(##$src1) = $src2.new"),
- []>,
- Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1,
-isNVStore = 1, validSubTargets = HasV4SubT in {
- def _abs_cPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
- !strconcat("if ($src1)",
- !strconcat(OpcStr, "(##$src2) = $src3.new")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
- !strconcat("if (!$src1)",
- !strconcat(OpcStr, "(##$src2) = $src3.new")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cdnPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
- !strconcat("if ($src1.new)",
- !strconcat(OpcStr, "(##$src2) = $src3.new")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs),
- (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
- !strconcat("if (!$src1.new)",
- !strconcat(OpcStr, "(##$src2) = $src3.new")),
- []>,
- Requires<[HasV4T]>;
-}
-}
+let AddedComplexity = 40 in
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
+ (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
+ (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
+ Requires<[HasV4T]>;
-defm STrib_imm : ST_absimm<"memb">;
-defm STrih_imm : ST_absimm<"memh">;
-defm STriw_imm : ST_absimm<"memw">;
+let AddedComplexity = 40 in
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
+ (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
+ (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
+ Requires<[HasV4T]>;
let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+ (STrib_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+ (STrih_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
- (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-}
-
-// Load - absolute addressing mode: These instruction take constant
-// value as the extended operand
-
-multiclass LD_absimm<string OpcStr> {
-let isExtended = 1, opExtendable = 1, isPredicable = 1,
-validSubTargets = HasV4SubT in
- def _abs_V4 : LDInst2<(outs IntRegs:$dst),
- (ins u0AlwaysExt:$src),
- !strconcat("$dst = ",
- !strconcat(OpcStr, "(##$src)")),
- []>,
- Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, isPredicated = 1,
-validSubTargets = HasV4SubT in {
- def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u0AlwaysExt:$src2),
- !strconcat("if ($src1) $dst = ",
- !strconcat(OpcStr, "(##$src2)")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u0AlwaysExt:$src2),
- !strconcat("if (!$src1) $dst = ",
- !strconcat(OpcStr, "(##$src2)")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u0AlwaysExt:$src2),
- !strconcat("if ($src1.new) $dst = ",
- !strconcat(OpcStr, "(##$src2)")),
- []>,
- Requires<[HasV4T]>;
-
- def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
- (ins PredRegs:$src1, u0AlwaysExt:$src2),
- !strconcat("if (!$src1.new) $dst = ",
- !strconcat(OpcStr, "(##$src2)")),
- []>,
- Requires<[HasV4T]>;
+ (STriw_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
}
-}
-
-defm LDrib_imm : LD_absimm<"memb">;
-defm LDriub_imm : LD_absimm<"memub">;
-defm LDrih_imm : LD_absimm<"memh">;
-defm LDriuh_imm : LD_absimm<"memuh">;
-defm LDriw_imm : LD_absimm<"memw">;
let Predicates = [HasV4T], AddedComplexity = 30 in {
def : Pat<(i32 (load u0AlwaysExtPred:$src)),
- (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>;
+ (LDriw_abs_V4 u0AlwaysExtPred:$src)>;
def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
- (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>;
+ (LDrib_abs_V4 u0AlwaysExtPred:$src)>;
def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
- (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>;
+ (LDriub_abs_V4 u0AlwaysExtPred:$src)>;
def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
- (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>;
+ (LDrih_abs_V4 u0AlwaysExtPred:$src)>;
def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
- (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>;
+ (LDriuh_abs_V4 u0AlwaysExtPred:$src)>;
}
-// Indexed store double word - global address.
+// Indexed store word - global address.
// memw(Rs+#u6:2)=#S8
let AddedComplexity = 10 in
def STriw_offset_ext_V4 : STInst<(outs),
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 0318c519..bd7b26a 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -29,15 +29,18 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
std::vector<MachineInstr*> AllocaAdjustInsts;
int VarArgsFrameIndex;
bool HasClobberLR;
+ bool HasEHReturn;
std::map<const MachineInstr*, unsigned> PacketInfo;
public:
- HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+ HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0),
+ HasEHReturn(false) {}
HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
- HasClobberLR(0) {}
+ HasClobberLR(0),
+ HasEHReturn(false) {}
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -69,6 +72,8 @@ public:
void setHasClobberLR(bool v) { HasClobberLR = v; }
bool hasClobberLR() const { return HasClobberLR; }
+ bool hasEHReturn() const { return HasEHReturn; };
+ void setHasEHReturn(bool H = true) { HasEHReturn = H; };
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 5e80e48..05e6968 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -22,29 +22,31 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "hexagon-nvj"
-#include "Hexagon.h"
-#include "HexagonInstrInfo.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/PassSupport.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+
#include <map>
+
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
@@ -57,6 +59,11 @@ static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
cl::desc("Disable New Value Jumps"));
+namespace llvm {
+ void initializeHexagonNewValueJumpPass(PassRegistry&);
+}
+
+
namespace {
struct HexagonNewValueJump : public MachineFunctionPass {
const HexagonInstrInfo *QII;
@@ -65,9 +72,12 @@ namespace {
public:
static char ID;
- HexagonNewValueJump() : MachineFunctionPass(ID) { }
+ HexagonNewValueJump() : MachineFunctionPass(ID) {
+ initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -78,6 +88,8 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &Fn);
private:
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
};
@@ -85,6 +97,13 @@ namespace {
char HexagonNewValueJump::ID = 0;
+INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj",
+ "Hexagon NewValueJump", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj",
+ "Hexagon NewValueJump", false, false)
+
+
// We have identified this II could be feeder to NVJ,
// verify that it can be.
static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
@@ -208,19 +227,15 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
// range specified by the arch.
if (!secondReg) {
int64_t v = MI->getOperand(2).getImm();
- if (MI->getOpcode() == Hexagon::CMPGEri ||
- (MI->getOpcode() == Hexagon::CMPGEUri && v > 0))
- --v;
if (!(isUInt<5>(v) ||
((MI->getOpcode() == Hexagon::CMPEQri ||
- MI->getOpcode() == Hexagon::CMPGTri ||
- MI->getOpcode() == Hexagon::CMPGEri) &&
+ MI->getOpcode() == Hexagon::CMPGTri) &&
(v == -1))))
return false;
}
- unsigned cmpReg1, cmpOp2;
+ unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
cmpReg1 = MI->getOperand(1).getReg();
if (secondReg) {
@@ -271,58 +286,63 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
// Given a compare operator, return a matching New Value Jump
// compare operator. Make sure that MI here is included in
// HexagonInstrInfo.cpp::isNewValueJumpCandidate
-static unsigned getNewValueJumpOpcode(const MachineInstr *MI, int reg,
- bool secondRegNewified) {
+static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
+ bool secondRegNewified,
+ MachineBasicBlock *jmpTarget,
+ const MachineBranchProbabilityInfo
+ *MBPI) {
+ bool taken = false;
+ MachineBasicBlock *Src = MI->getParent();
+ const BranchProbability Prediction =
+ MBPI->getEdgeProbability(Src, jmpTarget);
+
+ if (Prediction >= BranchProbability(1,2))
+ taken = true;
+
switch (MI->getOpcode()) {
case Hexagon::CMPEQrr:
- return Hexagon::JMP_EQrrPt_nv_V4;
+ return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4
+ : Hexagon::CMPEQrr_t_Jumpnv_nt_V4;
case Hexagon::CMPEQri: {
if (reg >= 0)
- return Hexagon::JMP_EQriPt_nv_V4;
+ return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4
+ : Hexagon::CMPEQri_t_Jumpnv_nt_V4;
else
- return Hexagon::JMP_EQriPtneg_nv_V4;
+ return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4
+ : Hexagon::CMPEQn1_t_Jumpnv_nt_V4;
}
- case Hexagon::CMPLTrr:
case Hexagon::CMPGTrr: {
if (secondRegNewified)
- return Hexagon::JMP_GTrrdnPt_nv_V4;
+ return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4
+ : Hexagon::CMPLTrr_t_Jumpnv_nt_V4;
else
- return Hexagon::JMP_GTrrPt_nv_V4;
- }
-
- case Hexagon::CMPGEri: {
- if (reg >= 1)
- return Hexagon::JMP_GTriPt_nv_V4;
- else
- return Hexagon::JMP_GTriPtneg_nv_V4;
+ return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4
+ : Hexagon::CMPGTrr_t_Jumpnv_nt_V4;
}
case Hexagon::CMPGTri: {
if (reg >= 0)
- return Hexagon::JMP_GTriPt_nv_V4;
+ return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4
+ : Hexagon::CMPGTri_t_Jumpnv_nt_V4;
else
- return Hexagon::JMP_GTriPtneg_nv_V4;
+ return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4
+ : Hexagon::CMPGTn1_t_Jumpnv_nt_V4;
}
- case Hexagon::CMPLTUrr:
case Hexagon::CMPGTUrr: {
if (secondRegNewified)
- return Hexagon::JMP_GTUrrdnPt_nv_V4;
+ return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4
+ : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4;
else
- return Hexagon::JMP_GTUrrPt_nv_V4;
+ return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4
+ : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4;
}
case Hexagon::CMPGTUri:
- return Hexagon::JMP_GTUriPt_nv_V4;
-
- case Hexagon::CMPGEUri: {
- if (reg == 0)
- return Hexagon::JMP_EQrrPt_nv_V4;
- else
- return Hexagon::JMP_GTUriPt_nv_V4;
- }
+ return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4
+ : Hexagon::CMPGTUri_t_Jumpnv_nt_V4;
default:
llvm_unreachable("Could not find matching New Value Jump instruction.");
@@ -346,6 +366,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
QRI =
static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
if (!QRI->Subtarget.hasV4TOps() ||
DisableNewValueJumps) {
@@ -393,12 +414,12 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");
if (!foundJump &&
- (MI->getOpcode() == Hexagon::JMP_c ||
- MI->getOpcode() == Hexagon::JMP_cNot ||
- MI->getOpcode() == Hexagon::JMP_cdnPt ||
- MI->getOpcode() == Hexagon::JMP_cdnPnt ||
- MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
- MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) {
+ (MI->getOpcode() == Hexagon::JMP_t ||
+ MI->getOpcode() == Hexagon::JMP_f ||
+ MI->getOpcode() == Hexagon::JMP_tnew_t ||
+ MI->getOpcode() == Hexagon::JMP_tnew_nt ||
+ MI->getOpcode() == Hexagon::JMP_fnew_t ||
+ MI->getOpcode() == Hexagon::JMP_fnew_nt)) {
// This is where you would insert your compare and
// instr that feeds compare
jmpPos = MII;
@@ -434,9 +455,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
jmpTarget = MI->getOperand(1).getMBB();
foundJump = true;
- if (MI->getOpcode() == Hexagon::JMP_cNot ||
- MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
- MI->getOpcode() == Hexagon::JMP_cdnNotPnt) {
+ if (MI->getOpcode() == Hexagon::JMP_f ||
+ MI->getOpcode() == Hexagon::JMP_fnew_t ||
+ MI->getOpcode() == Hexagon::JMP_fnew_nt) {
invertPredicate = true;
}
continue;
@@ -525,10 +546,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
if (isSecondOpReg) {
// In case of CMPLT, or CMPLTU, or EQ with the second register
// to newify, swap the operands.
- if (cmpInstr->getOpcode() == Hexagon::CMPLTrr ||
- cmpInstr->getOpcode() == Hexagon::CMPLTUrr ||
- (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
- feederReg == (unsigned) cmpOp2)) {
+ if (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
+ feederReg == (unsigned) cmpOp2) {
unsigned tmp = cmpReg1;
bool tmpIsKill = MO1IsKill;
cmpReg1 = cmpOp2;
@@ -582,42 +601,34 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
"This compare is not a New Value Jump candidate.");
unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
- isSecondOpNewified);
+ isSecondOpNewified,
+ jmpTarget, MBPI);
if (invertPredicate)
opc = QII->getInvertedPredicatedOpcode(opc);
- // Manage the conversions from CMPGEUri to either CMPEQrr
- // or CMPGTUri properly. See Arch spec for CMPGEUri instructions.
- // This has to be after the getNewValueJumpOpcode function call as
- // second operand of the compare could be modified in this logic.
- if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) {
- if (cmpOp2 == 0) {
- cmpOp2 = cmpReg1;
- MO2IsKill = MO1IsKill;
- isSecondOpReg = true;
- } else
- --cmpOp2;
- }
-
- // Manage the conversions from CMPGEri to CMPGTUri properly.
- // See Arch spec for CMPGEri instructions.
- if (cmpInstr->getOpcode() == Hexagon::CMPGEri)
- --cmpOp2;
-
- if (isSecondOpReg) {
+ if (isSecondOpReg)
NewMI = BuildMI(*MBB, jmpPos, dl,
QII->get(opc))
.addReg(cmpReg1, getKillRegState(MO1IsKill))
.addReg(cmpOp2, getKillRegState(MO2IsKill))
.addMBB(jmpTarget);
- }
- else {
+
+ else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri ||
+ cmpInstr->getOpcode() == Hexagon::CMPGTri) &&
+ cmpOp2 == -1 )
+ // Corresponding new-value compare jump instructions don't have the
+ // operand for -1 immediate value.
+ NewMI = BuildMI(*MBB, jmpPos, dl,
+ QII->get(opc))
+ .addReg(cmpReg1, getKillRegState(MO1IsKill))
+ .addMBB(jmpTarget);
+
+ else
NewMI = BuildMI(*MBB, jmpPos, dl,
QII->get(opc))
.addReg(cmpReg1, getKillRegState(MO1IsKill))
.addImm(cmpOp2)
.addMBB(jmpTarget);
- }
assert(NewMI && "New Value Jump Instruction Not created!");
if (cmpInstr->getOperand(0).isReg() &&
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index 576f1d7..89e3406 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -61,10 +61,6 @@ static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Peephole Optimization"));
-static cl::opt<int>
-DbgPNPCount("pnp-count", cl::init(-1), cl::Hidden,
- cl::desc("Maximum number of P=NOT(P) to be optimized"));
-
static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Optimization of PNotP"));
@@ -73,6 +69,14 @@ static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Optimization of Sign/Zero Extends"));
+static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Optimization of extensions to i64."));
+
+namespace llvm {
+ void initializeHexagonPeepholePass(PassRegistry&);
+}
+
namespace {
struct HexagonPeephole : public MachineFunctionPass {
const HexagonInstrInfo *QII;
@@ -81,7 +85,9 @@ namespace {
public:
static char ID;
- HexagonPeephole() : MachineFunctionPass(ID) { }
+ HexagonPeephole() : MachineFunctionPass(ID) {
+ initializeHexagonPeepholePass(*PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &MF);
@@ -100,8 +106,10 @@ namespace {
char HexagonPeephole::ID = 0;
-bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
+INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
+ false, false)
+bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().
getInstrInfo());
QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().
@@ -142,6 +150,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // Look for %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
+ // %vreg170:DoublRegs, %vreg169:IntRegs
+ if (!DisableOptExtTo64 &&
+ MI->getOpcode () == Hexagon::COMBINE_Ir_V4) {
+ assert (MI->getNumOperands() == 3);
+ MachineOperand &Dst = MI->getOperand(0);
+ MachineOperand &Src1 = MI->getOperand(1);
+ MachineOperand &Src2 = MI->getOperand(2);
+ if (Src1.getImm() != 0)
+ continue;
+ unsigned DstReg = Dst.getReg();
+ unsigned SrcReg = Src2.getReg();
+ PeepholeMap[DstReg] = SrcReg;
+ }
+
// Look for this sequence below
// %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
// %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index 34bf4ea..44234e8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -21,11 +21,18 @@
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonRemoveExtendArgsPass(PassRegistry&);
+}
+
namespace {
struct HexagonRemoveExtendArgs : public FunctionPass {
public:
static char ID;
- HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+ HexagonRemoveExtendArgs() : FunctionPass(ID) {
+ initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &F);
const char *getPassName() const {
@@ -41,11 +48,9 @@ namespace {
}
char HexagonRemoveExtendArgs::ID = 0;
-RegisterPass<HexagonRemoveExtendArgs> X("reargs",
- "Remove Sign and Zero Extends for Args"
- );
-
+INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs",
+ "Remove Sign and Zero Extends for Args", false, false)
bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
unsigned Idx = 1;
@@ -78,6 +83,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
-FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+FunctionPass*
+llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) {
return new HexagonRemoveExtendArgs();
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index 814249f..8608e08 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -49,16 +49,23 @@
using namespace llvm;
+namespace llvm {
+ void initializeHexagonSplitTFRCondSetsPass(PassRegistry&);
+}
+
+
namespace {
class HexagonSplitTFRCondSets : public MachineFunctionPass {
- HexagonTargetMachine& QTM;
+ const HexagonTargetMachine &QTM;
const HexagonSubtarget &QST;
public:
static char ID;
- HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
- MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+ HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) :
+ MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+ initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry());
+ }
const char *getPassName() const {
return "Hexagon Split TFRCondSets";
@@ -211,6 +218,18 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon Split TFRCondSets";
+ PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr",
+ &HexagonSplitTFRCondSets::ID, 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass*
+llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) {
return new HexagonSplitTFRCondSets(TM);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index ce45c62..caa1ba4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -25,19 +25,17 @@
using namespace llvm;
-static cl::
-opt<bool> DisableHardwareLoops(
- "disable-hexagon-hwloops", cl::Hidden,
- cl::desc("Disable Hardware Loops for Hexagon target"));
+static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
+ cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
-static cl::
-opt<bool> DisableHexagonMISched("disable-hexagon-misched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon MI Scheduling"));
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon CFG Optimization"));
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon CFG Optimization"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
@@ -126,55 +124,62 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
}
bool HexagonPassConfig::addInstSelector() {
+ const HexagonTargetMachine &TM = getHexagonTargetMachine();
+ bool NoOpt = (getOptLevel() == CodeGenOpt::None);
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+ if (!NoOpt)
+ addPass(createHexagonRemoveExtendArgs(TM));
- addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
+ addPass(createHexagonISelDag(TM, getOptLevel()));
- if (getOptLevel() != CodeGenOpt::None)
+ if (!NoOpt) {
addPass(createHexagonPeephole());
+ printAndVerify("After hexagon peephole pass");
+ }
return false;
}
-
bool HexagonPassConfig::addPreRegAlloc() {
- if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createHexagonHardwareLoops());
+ if (getOptLevel() != CodeGenOpt::None)
+ if (!DisableHardwareLoops)
+ addPass(createHexagonHardwareLoops());
return false;
}
bool HexagonPassConfig::addPostRegAlloc() {
- if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None)
- addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
- return true;
+ const HexagonTargetMachine &TM = getHexagonTargetMachine();
+ if (getOptLevel() != CodeGenOpt::None)
+ if (!DisableHexagonCFGOpt)
+ addPass(createHexagonCFGOptimizer(TM));
+ return false;
}
-
bool HexagonPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
- return true;
+ return false;
}
bool HexagonPassConfig::addPreEmitPass() {
+ const HexagonTargetMachine &TM = getHexagonTargetMachine();
+ bool NoOpt = (getOptLevel() == CodeGenOpt::None);
- if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createHexagonFixupHwLoops());
-
- if (getOptLevel() != CodeGenOpt::None)
+ if (!NoOpt)
addPass(createHexagonNewValueJump());
// Expand Spill code for predicate registers.
- addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
+ addPass(createHexagonExpandPredSpillCode(TM));
// Split up TFRcondsets into conditional transfers.
- addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+ addPass(createHexagonSplitTFRCondSets(TM));
// Create Packets.
- if (getOptLevel() != CodeGenOpt::None)
+ if (!NoOpt) {
+ if (!DisableHardwareLoops)
+ addPass(createHexagonFixupHwLoops());
addPass(createHexagonPacketizer());
+ }
return false;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c0d86da..39995e1 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -48,19 +48,32 @@
#include "HexagonMachineFunctionInfo.h"
#include <map>
+#include <vector>
using namespace llvm;
+static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
+ cl::ZeroOrMore, cl::Hidden, cl::init(true),
+ cl::desc("Allow non-solo packetization of volatile memory references"));
+
+namespace llvm {
+ void initializeHexagonPacketizerPass(PassRegistry&);
+}
+
+
namespace {
class HexagonPacketizer : public MachineFunctionPass {
public:
static char ID;
- HexagonPacketizer() : MachineFunctionPass(ID) {}
+ HexagonPacketizer() : MachineFunctionPass(ID) {
+ initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
@@ -96,10 +109,17 @@ namespace {
// schedule this instruction.
bool FoundSequentialDependence;
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+
+ // Track MIs with ignored dependece.
+ std::vector<MachineInstr*> IgnoreDepMIs;
+
public:
// Ctor.
HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- MachineDominatorTree &MDT);
+ MachineDominatorTree &MDT,
+ const MachineBranchProbabilityInfo *MBPI);
// initPacketizerState - initialize some internal flags.
void initPacketizerState();
@@ -123,20 +143,20 @@ namespace {
private:
bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
- MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC);
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
- MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC);
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit,
- MachineBasicBlock::iterator &MII);
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit,
+ MachineBasicBlock::iterator &MII);
bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
- unsigned DepReg,
- std::map <MachineInstr*, SUnit*> MIToSUnit);
+ unsigned DepReg,
+ std::map <MachineInstr*, SUnit*> MIToSUnit);
bool DemoteToDotOld(MachineInstr* MI);
bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
std::map <MachineInstr*, SUnit*> MIToSUnit);
@@ -152,19 +172,32 @@ namespace {
};
}
+INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
+ false, false)
+
+
// HexagonPacketizerList Ctor.
HexagonPacketizerList::HexagonPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT)
+ MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT,
+ const MachineBranchProbabilityInfo *MBPI)
: VLIWPacketizerList(MF, MLI, MDT, true){
+ this->MBPI = MBPI;
}
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
-
+ const MachineBranchProbabilityInfo *MBPI =
+ &getAnalysis<MachineBranchProbabilityInfo>();
// Instantiate the packetizer.
- HexagonPacketizerList Packetizer(Fn, MLI, MDT);
+ HexagonPacketizerList Packetizer(Fn, MLI, MDT, MBPI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -710,8 +743,10 @@ static int GetDotNewOp(const int opc) {
}
// Return .new predicate version for an instruction
-static int GetDotNewPredOp(const int opc) {
- switch (opc) {
+static int GetDotNewPredOp(MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI,
+ const HexagonInstrInfo *QII) {
+ switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown .new type");
// Conditional stores
// Store byte conditionally
@@ -857,17 +892,15 @@ static int GetDotNewPredOp(const int opc) {
return Hexagon::STw_GP_cdnNotPt_V4;
// Condtional Jumps
- case Hexagon::JMP_c:
- return Hexagon::JMP_cdnPt;
+ case Hexagon::JMP_t:
+ case Hexagon::JMP_f:
+ return QII->getDotNewPredJumpOp(MI, MBPI);
- case Hexagon::JMP_cNot:
- return Hexagon::JMP_cdnNotPt;
+ case Hexagon::JMPR_t:
+ return Hexagon::JMPR_tnew_tV3;
- case Hexagon::JMPR_cPt:
- return Hexagon::JMPR_cdnPt_V3;
-
- case Hexagon::JMPR_cNotPt:
- return Hexagon::JMPR_cdnNotPt_V3;
+ case Hexagon::JMPR_f:
+ return Hexagon::JMPR_fnew_tV3;
// Conditional Transfers
case Hexagon::TFR_cPt:
@@ -1261,7 +1294,7 @@ bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
int NewOpcode;
if (RC == &Hexagon::PredRegsRegClass)
- NewOpcode = GetDotNewPredOp(MI->getOpcode());
+ NewOpcode = GetDotNewPredOp(MI, MBPI, QII);
else
NewOpcode = GetDotNewOp(MI->getOpcode());
MI->setDesc(QII->get(NewOpcode));
@@ -1306,17 +1339,17 @@ static int GetDotOldOp(const int opc) {
case Hexagon::TFRI_cdnNotPt:
return Hexagon::TFRI_cNotPt;
- case Hexagon::JMP_cdnPt:
- return Hexagon::JMP_c;
+ case Hexagon::JMP_tnew_t:
+ return Hexagon::JMP_t;
- case Hexagon::JMP_cdnNotPt:
- return Hexagon::JMP_cNot;
+ case Hexagon::JMP_fnew_t:
+ return Hexagon::JMP_f;
- case Hexagon::JMPR_cdnPt_V3:
- return Hexagon::JMPR_cPt;
+ case Hexagon::JMPR_tnew_tV3:
+ return Hexagon::JMPR_t;
- case Hexagon::JMPR_cdnNotPt_V3:
- return Hexagon::JMPR_cNotPt;
+ case Hexagon::JMPR_fnew_tV3:
+ return Hexagon::JMPR_f;
// Load double word
@@ -1912,7 +1945,7 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::STrih_imm_cdnPt_V4 :
case Hexagon::STriw_imm_cPt_V4 :
case Hexagon::STriw_imm_cdnPt_V4 :
- case Hexagon::JMP_cdnPt :
+ case Hexagon::JMP_tnew_t :
case Hexagon::LDrid_cPt :
case Hexagon::LDrid_cdnPt :
case Hexagon::LDrid_indexed_cPt :
@@ -2051,7 +2084,7 @@ static bool GetPredicateSense(MachineInstr* MI,
case Hexagon::STrih_imm_cdnNotPt_V4 :
case Hexagon::STriw_imm_cNotPt_V4 :
case Hexagon::STriw_imm_cdnNotPt_V4 :
- case Hexagon::JMP_cdnNotPt :
+ case Hexagon::JMP_fnew_t :
case Hexagon::LDrid_cNotPt :
case Hexagon::LDrid_cdnNotPt :
case Hexagon::LDrid_indexed_cNotPt :
@@ -2739,9 +2772,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// If an instruction feeds new value jump, glue it.
MachineBasicBlock::iterator NextMII = I;
++NextMII;
- MachineInstr *NextMI = NextMII;
-
- if (QII->isNewValueJump(NextMI)) {
+ if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
+ MachineInstr *NextMI = NextMII;
bool secondRegMatch = false;
bool maintainNewValueJump = false;
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 78ad24d..34e33fd 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -237,7 +237,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
// Use load to get GOT target
SDValue Ops[] = { Callee, GPReg, Chain };
SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
- MVT::i32, MVT::Other, Ops, 3), 0);
+ MVT::i32, MVT::Other, Ops), 0);
Chain = Load.getValue(1);
// Call target must be on T9
diff --git a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
index f86bc0b..d27cd39 100644
--- a/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -724,8 +724,7 @@ let usesCustomInserter=1 in {
[(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
def MEMBARRIER : MBlazePseudo<(outs), (ins),
- "# memory barrier",
- [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+ "# memory barrier", []>;
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mangler.cpp b/contrib/llvm/lib/Target/Mangler.cpp
index edfd421..d31efa8 100644
--- a/contrib/llvm/lib/Target/Mangler.cpp
+++ b/contrib/llvm/lib/Target/Mangler.cpp
@@ -188,7 +188,12 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
// If this global has a name, handle it simply.
if (GV->hasName()) {
- getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+ StringRef Name = GV->getName();
+ getNameWithPrefix(OutName, Name, PrefixTy);
+ // No need to do anything else if the global has the special "do not mangle"
+ // flag in the name.
+ if (Name[0] == 1)
+ return;
} else {
// Get the ID for the global, assigning a new one if we haven't got one
// already.
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index c403f21..0795cb9 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -63,7 +63,6 @@ class MipsAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
MipsAssemblerOptions Options;
-
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
@@ -127,9 +126,12 @@ class MipsAsmParser : public MCTargetAsmParser {
bool isLoad,bool isImmOpnd);
bool reportParseError(StringRef ErrorMsg);
- bool parseMemOffset(const MCExpr *&Res);
+ bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
bool parseRelocOperand(const MCExpr *&Res);
+ const MCExpr* evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
+
+ bool isEvaluated(const MCExpr *Expr);
bool parseDirectiveSet();
bool parseSetAtDirective();
@@ -171,7 +173,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool requestsDoubleOperand(StringRef Mnemonic);
- unsigned getReg(int RC,int RegNo);
+ unsigned getReg(int RC, int RegNo);
int getATReg();
@@ -269,7 +271,7 @@ public:
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCExpr *Expr = getImm();
- addExpr(Inst,Expr);
+ addExpr(Inst, Expr);
}
void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -278,7 +280,7 @@ public:
Inst.addOperand(MCOperand::CreateReg(getMemBase()));
const MCExpr *Expr = getMemOff();
- addExpr(Inst,Expr);
+ addExpr(Inst, Expr);
}
bool isReg() const { return Kind == k_Register; }
@@ -391,15 +393,19 @@ public:
}
/// getStartLoc - Get the location of the first token of this operand.
- SMLoc getStartLoc() const { return StartLoc; }
+ SMLoc getStartLoc() const {
+ return StartLoc;
+ }
/// getEndLoc - Get the location of the last token of this operand.
- SMLoc getEndLoc() const { return EndLoc; }
+ SMLoc getEndLoc() const {
+ return EndLoc;
+ }
virtual void print(raw_ostream &OS) const {
llvm_unreachable("unimplemented!");
}
-};
-}
+}; // class MipsOperand
+} // namespace
namespace llvm {
extern const MCInstrDesc MipsInsts[];
@@ -409,39 +415,55 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) {
}
bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+ SmallVectorImpl<MCInst> &Instructions) {
const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
Inst.setLoc(IDLoc);
+ if (MCID.hasDelaySlot() && Options.isReorder()) {
+ // If this instruction has a delay slot and .set reorder is active,
+ // emit a NOP after it.
+ Instructions.push_back(Inst);
+ MCInst NopInst;
+ NopInst.setOpcode(Mips::SLL);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateImm(0));
+ Instructions.push_back(NopInst);
+ return false;
+ }
+
if (MCID.mayLoad() || MCID.mayStore()) {
// Check the offset of memory operand, if it is a symbol
- // reference or immediate we may have to expand instructions
- for (unsigned i=0;i<MCID.getNumOperands();i++) {
+ // reference or immediate we may have to expand instructions.
+ for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
const MCOperandInfo &OpInfo = MCID.OpInfo[i];
- if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
- (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY)
+ || (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
MCOperand &Op = Inst.getOperand(i);
if (Op.isImm()) {
int MemOffset = Op.getImm();
if (MemOffset < -32768 || MemOffset > 32767) {
- // Offset can't exceed 16bit value
- expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+ // Offset can't exceed 16bit value.
+ expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), true);
return false;
}
} else if (Op.isExpr()) {
const MCExpr *Expr = Op.getExpr();
- if (Expr->getKind() == MCExpr::SymbolRef){
+ if (Expr->getKind() == MCExpr::SymbolRef) {
const MCSymbolRefExpr *SR =
- static_cast<const MCSymbolRefExpr*>(Expr);
+ static_cast<const MCSymbolRefExpr*>(Expr);
if (SR->getKind() == MCSymbolRefExpr::VK_None) {
- // Expand symbol
- expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+ // Expand symbol.
+ expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
return false;
}
+ } else if (!isEvaluated(Expr)) {
+ expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
+ return false;
}
}
}
- }
- }
+ } // for
+ } // if load/store
if (needsExpansion(Inst))
expandInstruction(Inst, IDLoc, Instructions);
@@ -453,30 +475,30 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
bool MipsAsmParser::needsExpansion(MCInst &Inst) {
- switch(Inst.getOpcode()) {
- case Mips::LoadImm32Reg:
- case Mips::LoadAddr32Imm:
- case Mips::LoadAddr32Reg:
- return true;
- default:
- return false;
+ switch (Inst.getOpcode()) {
+ case Mips::LoadImm32Reg:
+ case Mips::LoadAddr32Imm:
+ case Mips::LoadAddr32Reg:
+ return true;
+ default:
+ return false;
}
}
void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions){
- switch(Inst.getOpcode()) {
- case Mips::LoadImm32Reg:
- return expandLoadImm(Inst, IDLoc, Instructions);
- case Mips::LoadAddr32Imm:
- return expandLoadAddressImm(Inst,IDLoc,Instructions);
- case Mips::LoadAddr32Reg:
- return expandLoadAddressReg(Inst,IDLoc,Instructions);
- }
+ SmallVectorImpl<MCInst> &Instructions) {
+ switch (Inst.getOpcode()) {
+ case Mips::LoadImm32Reg:
+ return expandLoadImm(Inst, IDLoc, Instructions);
+ case Mips::LoadAddr32Imm:
+ return expandLoadAddressImm(Inst, IDLoc, Instructions);
+ case Mips::LoadAddr32Reg:
+ return expandLoadAddressReg(Inst, IDLoc, Instructions);
+ }
}
void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions){
+ SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(1);
assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -485,26 +507,24 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
int ImmValue = ImmOp.getImm();
tmpInst.setLoc(IDLoc);
- if ( 0 <= ImmValue && ImmValue <= 65535) {
- // for 0 <= j <= 65535.
+ if (0 <= ImmValue && ImmValue <= 65535) {
+ // For 0 <= j <= 65535.
// li d,j => ori d,$zero,j
tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
- tmpInst.addOperand(
- MCOperand::CreateReg(Mips::ZERO));
+ tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
- } else if ( ImmValue < 0 && ImmValue >= -32768) {
- // for -32768 <= j < 0.
+ } else if (ImmValue < 0 && ImmValue >= -32768) {
+ // For -32768 <= j < 0.
// li d,j => addiu d,$zero,j
tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
- tmpInst.addOperand(
- MCOperand::CreateReg(Mips::ZERO));
+ tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
- // for any other value of j that is representable as a 32-bit integer.
+ // For any other value of j that is representable as a 32-bit integer.
// li d,j => lui d,hi16(j)
// ori d,d,lo16(j)
tmpInst.setOpcode(Mips::LUi);
@@ -522,7 +542,7 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
}
void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions){
+ SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(2);
assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -531,19 +551,19 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
const MCOperand &DstRegOp = Inst.getOperand(0);
assert(DstRegOp.isReg() && "expected register operand kind");
int ImmValue = ImmOp.getImm();
- if ( -32768 <= ImmValue && ImmValue <= 65535) {
- //for -32768 <= j <= 65535.
- //la d,j(s) => addiu d,s,j
+ if (-32768 <= ImmValue && ImmValue <= 65535) {
+ // For -32768 <= j <= 65535.
+ // la d,j(s) => addiu d,s,j
tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
- //for any other value of j that is representable as a 32-bit integer.
- //la d,j(s) => lui d,hi16(j)
- // ori d,d,lo16(j)
- // addu d,d,s
+ // For any other value of j that is representable as a 32-bit integer.
+ // la d,j(s) => lui d,hi16(j)
+ // ori d,d,lo16(j)
+ // addu d,d,s
tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
@@ -564,26 +584,25 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
}
void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions){
+ SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(1);
assert(ImmOp.isImm() && "expected immediate operand kind");
const MCOperand &RegOp = Inst.getOperand(0);
assert(RegOp.isReg() && "expected register operand kind");
int ImmValue = ImmOp.getImm();
- if ( -32768 <= ImmValue && ImmValue <= 65535) {
- //for -32768 <= j <= 65535.
- //la d,j => addiu d,$zero,j
+ if (-32768 <= ImmValue && ImmValue <= 65535) {
+ // For -32768 <= j <= 65535.
+ // la d,j => addiu d,$zero,j
tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
- tmpInst.addOperand(
- MCOperand::CreateReg(Mips::ZERO));
+ tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else {
- //for any other value of j that is representable as a 32-bit integer.
- //la d,j => lui d,hi16(j)
- // ori d,d,lo16(j)
+ // For any other value of j that is representable as a 32-bit integer.
+ // la d,j => lui d,hi16(j)
+ // ori d,d,lo16(j)
tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
@@ -598,40 +617,37 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
}
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions,
- bool isLoad,bool isImmOpnd) {
+ SmallVectorImpl<MCInst> &Instructions, bool isLoad, bool isImmOpnd) {
const MCSymbolRefExpr *SR;
MCInst TempInst;
- unsigned ImmOffset,HiOffset,LoOffset;
+ unsigned ImmOffset, HiOffset, LoOffset;
const MCExpr *ExprOffset;
unsigned TmpRegNum;
- unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
- Mips::CPURegsRegClassID,
- getATReg());
- // 1st operand is either source or dst register
+ unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID
+ : Mips::CPURegsRegClassID, getATReg());
+ // 1st operand is either the source or destination register.
assert(Inst.getOperand(0).isReg() && "expected register operand kind");
unsigned RegOpNum = Inst.getOperand(0).getReg();
- // 2nd operand is base register
+ // 2nd operand is the base register.
assert(Inst.getOperand(1).isReg() && "expected register operand kind");
unsigned BaseRegNum = Inst.getOperand(1).getReg();
- // 3rd operand is either immediate or expression
+ // 3rd operand is either an immediate or expression.
if (isImmOpnd) {
assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
ImmOffset = Inst.getOperand(2).getImm();
LoOffset = ImmOffset & 0x0000ffff;
HiOffset = (ImmOffset & 0xffff0000) >> 16;
- // If msb of LoOffset is 1(negative number) we must increment HiOffset
+ // If msb of LoOffset is 1(negative number) we must increment HiOffset.
if (LoOffset & 0x8000)
HiOffset++;
- }
- else
+ } else
ExprOffset = Inst.getOperand(2).getExpr();
- // All instructions will have the same location
+ // All instructions will have the same location.
TempInst.setLoc(IDLoc);
// 1st instruction in expansion is LUi. For load instruction we can use
// the dst register as a temporary if base and dst are different,
- // but for stores we must use $at
- TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+ // but for stores we must use $at.
+ TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum;
TempInst.setOpcode(Mips::LUi);
TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
if (isImmOpnd)
@@ -639,26 +655,28 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
else {
if (ExprOffset->getKind() == MCExpr::SymbolRef) {
SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
- const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
- Create(SR->getSymbol().getName(),
- MCSymbolRefExpr::VK_Mips_ABS_HI,
- getContext());
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
+ SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+ } else {
+ const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
}
}
- // Add the instruction to the list
+ // Add the instruction to the list.
Instructions.push_back(TempInst);
- // and prepare TempInst for next instruction
+ // Prepare TempInst for next instruction.
TempInst.clear();
- // which is add temp register to base
+ // Add temp register to base.
TempInst.setOpcode(Mips::ADDu);
TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
Instructions.push_back(TempInst);
TempInst.clear();
- // and finaly, create original instruction with low part
- // of offset and new base
+ // And finaly, create original instruction with low part
+ // of offset and new base.
TempInst.setOpcode(Inst.getOpcode());
TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
@@ -666,10 +684,12 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
TempInst.addOperand(MCOperand::CreateImm(LoOffset));
else {
if (ExprOffset->getKind() == MCExpr::SymbolRef) {
- const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
- Create(SR->getSymbol().getName(),
- MCSymbolRefExpr::VK_Mips_ABS_LO,
- getContext());
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
+ SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO,
+ getContext());
+ TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+ } else {
+ const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
}
}
@@ -688,11 +708,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchingInlineAsm);
switch (MatchResult) {
- default: break;
+ default:
+ break;
case Match_Success: {
- if (processInstruction(Inst,IDLoc,Instructions))
+ if (processInstruction(Inst, IDLoc, Instructions))
return true;
- for(unsigned i =0; i < Instructions.size(); i++)
+ for (unsigned i = 0; i < Instructions.size(); i++)
Out.EmitInstruction(Instructions[i]);
return false;
}
@@ -705,8 +726,9 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ ErrorLoc = ((MipsOperand*) Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
}
return Error(ErrorLoc, "invalid operand for instruction");
@@ -757,10 +779,10 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
.Case("t9", 25)
.Default(-1);
- // Although SGI documentation just cut out t0-t3 for n32/n64,
+ // Although SGI documentation just cuts out t0-t3 for n32/n64,
// GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
// We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
- if (isMips64() && 8 <= CC && CC <= 11)
+ if (isMips64() && 8 <= CC && CC <= 11)
CC += 4;
if (CC == -1 && isMips64())
@@ -776,19 +798,23 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
return CC;
}
+
int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
+ if (Name.equals("fcc0"))
+ return Mips::FCC0;
+
int CC;
CC = matchCPURegisterName(Name);
if (CC != -1)
- return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID:
- Mips::CPURegsRegClassID);
+ return matchRegisterByNumber(CC, is64BitReg ? Mips::CPU64RegsRegClassID
+ : Mips::CPURegsRegClassID);
if (Name[0] == 'f') {
StringRef NumString = Name.substr(1);
unsigned IntVal;
- if( NumString.getAsInteger(10, IntVal))
- return -1; // not integer
+ if (NumString.getAsInteger(10, IntVal))
+ return -1; // This is not an integer.
if (IntVal > 31)
return -1;
@@ -797,18 +823,19 @@ int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
if (Format == FP_FORMAT_S || Format == FP_FORMAT_W)
return getReg(Mips::FGR32RegClassID, IntVal);
if (Format == FP_FORMAT_D) {
- if(isFP64()) {
+ if (isFP64()) {
return getReg(Mips::FGR64RegClassID, IntVal);
}
- // only even numbers available as register pairs
- if (( IntVal > 31) || (IntVal%2 != 0))
+ // Only even numbers available as register pairs.
+ if ((IntVal > 31) || (IntVal % 2 != 0))
return -1;
- return getReg(Mips::AFGR64RegClassID, IntVal/2);
+ return getReg(Mips::AFGR64RegClassID, IntVal / 2);
}
}
return -1;
}
+
void MipsAsmParser::setDefaultFpFormat() {
if (isMips64() || isFP64())
@@ -828,6 +855,7 @@ bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){
return IsDouble;
}
+
void MipsAsmParser::setFpFormat(StringRef Format) {
FpFormat = StringSwitch<FpFormatTy>(Format.lower())
@@ -850,7 +878,7 @@ int MipsAsmParser::getATReg() {
return Options.getATRegNum();
}
-unsigned MipsAsmParser::getReg(int RC,int RegNo) {
+unsigned MipsAsmParser::getReg(int RC, int RegNo) {
return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
}
@@ -871,14 +899,12 @@ int MipsAsmParser::tryParseRegister(bool is64BitReg) {
RegNum = matchRegisterName(lowerCase, is64BitReg);
} else if (Tok.is(AsmToken::Integer))
RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
- is64BitReg ? Mips::CPU64RegsRegClassID
- : Mips::CPURegsRegClassID);
+ is64BitReg ? Mips::CPU64RegsRegClassID : Mips::CPURegsRegClassID);
return RegNum;
}
-bool MipsAsmParser::
- tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- bool is64BitReg){
+bool MipsAsmParser::tryParseRegisterOperand(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands, bool is64BitReg) {
SMLoc S = Parser.getTok().getLoc();
int RegNo = -1;
@@ -888,7 +914,7 @@ bool MipsAsmParser::
return true;
Operands.push_back(MipsOperand::CreateReg(RegNo, S,
- Parser.getTok().getLoc()));
+ Parser.getTok().getLoc()));
Parser.Lex(); // Eat register token.
return false;
}
@@ -911,19 +937,19 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Dollar: {
- // parse register
+ // Parse the register.
SMLoc S = Parser.getTok().getLoc();
Parser.Lex(); // Eat dollar token.
- // parse register operand
+ // Parse the register operand.
if (!tryParseRegisterOperand(Operands, isMips64())) {
if (getLexer().is(AsmToken::LParen)) {
- // check if it is indexed addressing operand
+ // Check if it is indexed addressing operand.
Operands.push_back(MipsOperand::CreateToken("(", S));
- Parser.Lex(); // eat parenthesis
+ Parser.Lex(); // Eat the parenthesis.
if (getLexer().isNot(AsmToken::Dollar))
return true;
- Parser.Lex(); // eat dollar
+ Parser.Lex(); // Eat the dollar
if (tryParseRegisterOperand(Operands, isMips64()))
return true;
@@ -936,7 +962,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
}
return false;
}
- // maybe it is a symbol reference
+ // Maybe it is a symbol reference.
StringRef Identifier;
if (Parser.parseIdentifier(Identifier))
return true;
@@ -945,7 +971,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
- // Otherwise create a symbol ref.
+ // Otherwise create a symbol reference.
const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
getContext());
@@ -954,16 +980,16 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
}
case AsmToken::Identifier:
// Look for the existing symbol, we should check if
- // we need to assigne the propper RegisterKind
- if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
- return false;
- //else drop to expression parsing
+ // we need to assigne the propper RegisterKind.
+ if (searchSymbolAlias(Operands, MipsOperand::Kind_None))
+ return false;
+ // Else drop to expression parsing.
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Integer:
case AsmToken::String: {
- // quoted label names
+ // Quoted label names.
const MCExpr *IdVal;
SMLoc S = Parser.getTok().getLoc();
if (getParser().parseExpression(IdVal))
@@ -973,9 +999,9 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return false;
}
case AsmToken::Percent: {
- // it is a symbol reference or constant expression
+ // It is a symbol reference or constant expression.
const MCExpr *IdVal;
- SMLoc S = Parser.getTok().getLoc(); // start location of the operand
+ SMLoc S = Parser.getTok().getLoc(); // Start location of the operand.
if (parseRelocOperand(IdVal))
return true;
@@ -988,131 +1014,200 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
}
-bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+const MCExpr* MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
+ StringRef RelocStr) {
+ const MCExpr *Res;
+ // Check the type of the expression.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Expr)) {
+ // It's a constant, evaluate lo or hi value.
+ if (RelocStr == "lo") {
+ short Val = MCE->getValue();
+ Res = MCConstantExpr::Create(Val, getContext());
+ } else if (RelocStr == "hi") {
+ int Val = MCE->getValue();
+ int LoSign = Val & 0x8000;
+ Val = (Val & 0xffff0000) >> 16;
+ // Lower part is treated as a signed int, so if it is negative
+ // we must add 1 to the hi part to compensate.
+ if (LoSign)
+ Val++;
+ Res = MCConstantExpr::Create(Val, getContext());
+ } else {
+ llvm_unreachable("Invalid RelocStr value");
+ }
+ return Res;
+ }
+
+ if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ // It's a symbol, create a symbolic expression from the symbol.
+ StringRef Symbol = MSRE->getSymbol().getName();
+ MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr);
+ Res = MCSymbolRefExpr::Create(Symbol, VK, getContext());
+ return Res;
+ }
+
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+ const MCExpr *LExp = evaluateRelocExpr(BE->getLHS(), RelocStr);
+ const MCExpr *RExp = evaluateRelocExpr(BE->getRHS(), RelocStr);
+ Res = MCBinaryExpr::Create(BE->getOpcode(), LExp, RExp, getContext());
+ return Res;
+ }
- Parser.Lex(); // eat % token
- const AsmToken &Tok = Parser.getTok(); // get next token, operation
+ if (const MCUnaryExpr *UN = dyn_cast<MCUnaryExpr>(Expr)) {
+ const MCExpr *UnExp = evaluateRelocExpr(UN->getSubExpr(), RelocStr);
+ Res = MCUnaryExpr::Create(UN->getOpcode(), UnExp, getContext());
+ return Res;
+ }
+ // Just return the original expression.
+ return Expr;
+}
+
+bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
+
+ switch (Expr->getKind()) {
+ case MCExpr::Constant:
+ return true;
+ case MCExpr::SymbolRef:
+ return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
+ case MCExpr::Binary:
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+ if (!isEvaluated(BE->getLHS()))
+ return false;
+ return isEvaluated(BE->getRHS());
+ }
+ case MCExpr::Unary:
+ return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
+ default:
+ return false;
+ }
+ return false;
+}
+
+bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+ Parser.Lex(); // Eat the % token.
+ const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
if (Tok.isNot(AsmToken::Identifier))
return true;
std::string Str = Tok.getIdentifier().str();
- Parser.Lex(); // eat identifier
- // now make expression from the rest of the operand
+ Parser.Lex(); // Eat the identifier.
+ // Now make an expression from the rest of the operand.
const MCExpr *IdVal;
SMLoc EndLoc;
if (getLexer().getKind() == AsmToken::LParen) {
while (1) {
- Parser.Lex(); // eat '(' token
+ Parser.Lex(); // Eat the '(' token.
if (getLexer().getKind() == AsmToken::Percent) {
- Parser.Lex(); // eat % token
+ Parser.Lex(); // Eat the % token.
const AsmToken &nextTok = Parser.getTok();
if (nextTok.isNot(AsmToken::Identifier))
return true;
Str += "(%";
Str += nextTok.getIdentifier();
- Parser.Lex(); // eat identifier
+ Parser.Lex(); // Eat the identifier.
if (getLexer().getKind() != AsmToken::LParen)
return true;
} else
break;
}
- if (getParser().parseParenExpression(IdVal,EndLoc))
+ if (getParser().parseParenExpression(IdVal, EndLoc))
return true;
while (getLexer().getKind() == AsmToken::RParen)
- Parser.Lex(); // eat ')' token
+ Parser.Lex(); // Eat the ')' token.
} else
- return true; // parenthesis must follow reloc operand
-
- // Check the type of the expression
- if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
- // It's a constant, evaluate lo or hi value
- if (Str == "lo") {
- short Val = MCE->getValue();
- Res = MCConstantExpr::Create(Val, getContext());
- } else if (Str == "hi") {
- int Val = MCE->getValue();
- int LoSign = Val & 0x8000;
- Val = (Val & 0xffff0000) >> 16;
- // Lower part is treated as a signed int, so if it is negative
- // we must add 1 to the hi part to compensate
- if (LoSign)
- Val++;
- Res = MCConstantExpr::Create(Val, getContext());
- }
- return false;
- }
+ return true; // Parenthesis must follow the relocation operand.
- if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
- // It's a symbol, create symbolic expression from symbol
- StringRef Symbol = MSRE->getSymbol().getName();
- MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
- Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
- return false;
- }
- return true;
+ Res = evaluateRelocExpr(IdVal, Str);
+ return false;
}
bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
-
StartLoc = Parser.getTok().getLoc();
RegNo = tryParseRegister(isMips64());
EndLoc = Parser.getTok().getLoc();
- return (RegNo == (unsigned)-1);
+ return (RegNo == (unsigned) -1);
}
-bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
-
+bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
SMLoc S;
+ bool Result = true;
- switch(getLexer().getKind()) {
+ while (getLexer().getKind() == AsmToken::LParen)
+ Parser.Lex();
+
+ switch (getLexer().getKind()) {
default:
return true;
case AsmToken::Identifier:
+ case AsmToken::LParen:
case AsmToken::Integer:
case AsmToken::Minus:
case AsmToken::Plus:
- return (getParser().parseExpression(Res));
+ if (isParenExpr)
+ Result = getParser().parseParenExpression(Res, S);
+ else
+ Result = (getParser().parseExpression(Res));
+ while (getLexer().getKind() == AsmToken::RParen)
+ Parser.Lex();
+ break;
case AsmToken::Percent:
- return parseRelocOperand(Res);
- case AsmToken::LParen:
- return false; // it's probably assuming 0
+ Result = parseRelocOperand(Res);
}
- return true;
+ return Result;
}
MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
+ SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
const MCExpr *IdVal = 0;
SMLoc S;
- // first operand is the offset
+ bool isParenExpr = false;
+ // First operand is the offset.
S = Parser.getTok().getLoc();
- if (parseMemOffset(IdVal))
- return MatchOperand_ParseFail;
+ if (getLexer().getKind() == AsmToken::LParen) {
+ Parser.Lex();
+ isParenExpr = true;
+ }
- const AsmToken &Tok = Parser.getTok(); // get next token
- if (Tok.isNot(AsmToken::LParen)) {
- MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
- if (Mnemonic->getToken() == "la") {
- SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
- Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
- return MatchOperand_Success;
+ if (getLexer().getKind() != AsmToken::Dollar) {
+ if (parseMemOffset(IdVal, isParenExpr))
+ return MatchOperand_ParseFail;
+
+ const AsmToken &Tok = Parser.getTok(); // Get the next token.
+ if (Tok.isNot(AsmToken::LParen)) {
+ MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
+ if (Mnemonic->getToken() == "la") {
+ SMLoc E = SMLoc::getFromPointer(
+ Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+ return MatchOperand_Success;
+ }
+ if (Tok.is(AsmToken::EndOfStatement)) {
+ SMLoc E = SMLoc::getFromPointer(
+ Parser.getTok().getLoc().getPointer() - 1);
+
+ // Zero register assumed, add a memory operand with ZERO as its base.
+ Operands.push_back(MipsOperand::CreateMem(isMips64() ? Mips::ZERO_64
+ : Mips::ZERO,
+ IdVal, S, E));
+ return MatchOperand_Success;
+ }
+ Error(Parser.getTok().getLoc(), "'(' expected");
+ return MatchOperand_ParseFail;
}
- Error(Parser.getTok().getLoc(), "'(' expected");
- return MatchOperand_ParseFail;
- }
- Parser.Lex(); // Eat '(' token.
+ Parser.Lex(); // Eat the '(' token.
+ }
- const AsmToken &Tok1 = Parser.getTok(); // get next token
+ const AsmToken &Tok1 = Parser.getTok(); // Get next token
if (Tok1.is(AsmToken::Dollar)) {
- Parser.Lex(); // Eat '$' token.
+ Parser.Lex(); // Eat the '$' token.
if (tryParseRegisterOperand(Operands, isMips64())) {
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return MatchOperand_ParseFail;
@@ -1123,7 +1218,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
return MatchOperand_ParseFail;
}
- const AsmToken &Tok2 = Parser.getTok(); // get next token
+ const AsmToken &Tok2 = Parser.getTok(); // Get next token.
if (Tok2.isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "')' expected");
return MatchOperand_ParseFail;
@@ -1131,17 +1226,26 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- Parser.Lex(); // Eat ')' token.
+ Parser.Lex(); // Eat the ')' token.
if (IdVal == 0)
IdVal = MCConstantExpr::Create(0, getContext());
- // now replace register operand with the mem operand
+ // Replace the register operand with the memory operand.
MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
int RegNo = op->getReg();
- // remove register from operands
+ // Remove the register from the operands.
Operands.pop_back();
- // and add memory operand
+ // Add the memory operand.
+ if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) {
+ int64_t Imm;
+ if (IdVal->EvaluateAsAbsolute(Imm))
+ IdVal = MCConstantExpr::Create(Imm, getContext());
+ else if (BE->getLHS()->getKind() != MCExpr::SymbolRef)
+ IdVal = MCBinaryExpr::Create(BE->getOpcode(), BE->getRHS(), BE->getLHS(),
+ getContext());
+ }
+
Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E));
delete op;
return MatchOperand_Success;
@@ -1153,17 +1257,17 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!isMips64())
return MatchOperand_NoMatch;
if (getLexer().getKind() == AsmToken::Identifier) {
- if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+ if (searchSymbolAlias(Operands, MipsOperand::Kind_CPU64Regs))
return MatchOperand_Success;
return MatchOperand_NoMatch;
}
- // if the first token is not '$' we have an error
+ // If the first token is not '$', we have an error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
Parser.Lex(); // Eat $
- if(!tryParseRegisterOperand(Operands, true)) {
- // set the proper register kind
+ if (!tryParseRegisterOperand(Operands, true)) {
+ // Set the proper register kind.
MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
op->setRegKind(MipsOperand::Kind_CPU64Regs);
return MatchOperand_Success;
@@ -1171,9 +1275,8 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_NoMatch;
}
-bool MipsAsmParser::
-searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- unsigned RegisterKind) {
+bool MipsAsmParser::searchSymbolAlias(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands, unsigned RegisterKind) {
MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
if (Sym) {
@@ -1187,13 +1290,13 @@ searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
const StringRef DefSymbol = Ref->getSymbol().getName();
if (DefSymbol.startswith("$")) {
- // Lookup for the register with corresponding name
- int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+ // Lookup for the register with the corresponding name.
+ int RegNum = matchRegisterName(DefSymbol.substr(1), isMips64());
if (RegNum > -1) {
Parser.Lex();
- MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
- Parser.getTok().getLoc());
- op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+ MipsOperand *op = MipsOperand::CreateReg(RegNum, S,
+ Parser.getTok().getLoc());
+ op->setRegKind((MipsOperand::RegisterKind) RegisterKind);
Operands.push_back(op);
return true;
}
@@ -1201,29 +1304,30 @@ searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
} else if (Expr->getKind() == MCExpr::Constant) {
Parser.Lex();
const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
- MipsOperand *op = MipsOperand::CreateImm(Const,S,
- Parser.getTok().getLoc());
+ MipsOperand *op = MipsOperand::CreateImm(Const, S,
+ Parser.getTok().getLoc());
Operands.push_back(op);
return true;
}
}
return false;
}
+
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (getLexer().getKind() == AsmToken::Identifier) {
- if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+ if (searchSymbolAlias(Operands, MipsOperand::Kind_CPURegs))
return MatchOperand_Success;
return MatchOperand_NoMatch;
}
- // if the first token is not '$' we have an error
+ // If the first token is not '$' we have an error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
Parser.Lex(); // Eat $
- if(!tryParseRegisterOperand(Operands, false)) {
- // set the propper register kind
+ if (!tryParseRegisterOperand(Operands, false)) {
+ // Set the proper register kind.
MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
op->setRegKind(MipsOperand::Kind_CPURegs);
return MatchOperand_Success;
@@ -1237,87 +1341,88 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (isMips64())
return MatchOperand_NoMatch;
- // if the first token is not '$' we have error
+ // If the first token is not '$' we have error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat $
+ Parser.Lex(); // Eat the '$'.
- const AsmToken &Tok = Parser.getTok(); // get next token
+ const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::Integer))
return MatchOperand_NoMatch;
unsigned RegNum = Tok.getIntVal();
- // at the moment only hwreg29 is supported
+ // At the moment only hwreg29 is supported.
if (RegNum != 29)
return MatchOperand_ParseFail;
MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
- Parser.getTok().getLoc());
+ Parser.getTok().getLoc());
op->setRegKind(MipsOperand::Kind_HWRegs);
Operands.push_back(op);
- Parser.Lex(); // Eat reg number
+ Parser.Lex(); // Eat the register number.
return MatchOperand_Success;
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseHW64Regs(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!isMips64())
return MatchOperand_NoMatch;
- //if the first token is not '$' we have error
+ // If the first token is not '$' we have an error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
SMLoc S = Parser.getTok().getLoc();
Parser.Lex(); // Eat $
- const AsmToken &Tok = Parser.getTok(); // get next token
+ const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::Integer))
return MatchOperand_NoMatch;
unsigned RegNum = Tok.getIntVal();
- // at the moment only hwreg29 is supported
+ // At the moment only hwreg29 is supported.
if (RegNum != 29)
return MatchOperand_ParseFail;
MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
- Parser.getTok().getLoc());
+ Parser.getTok().getLoc());
op->setRegKind(MipsOperand::Kind_HW64Regs);
Operands.push_back(op);
- Parser.Lex(); // Eat reg number
+ Parser.Lex(); // Eat the register number.
return MatchOperand_Success;
}
MipsAsmParser::OperandMatchResultTy
MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
unsigned RegNum;
- //if the first token is not '$' we have error
+ // If the first token is not '$' we have an error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat $
+ Parser.Lex(); // Eat the '$'
- const AsmToken &Tok = Parser.getTok(); // get next token
+ const AsmToken &Tok = Parser.getTok(); // Get next token.
if (Tok.is(AsmToken::Integer)) {
RegNum = Tok.getIntVal();
- // at the moment only fcc0 is supported
+ // At the moment only fcc0 is supported.
if (RegNum != 0)
return MatchOperand_ParseFail;
} else if (Tok.is(AsmToken::Identifier)) {
- // at the moment only fcc0 is supported
+ // At the moment only fcc0 is supported.
if (Tok.getIdentifier() != "fcc0")
return MatchOperand_ParseFail;
} else
return MatchOperand_NoMatch;
MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
- Parser.getTok().getLoc());
+ Parser.getTok().getLoc());
op->setRegKind(MipsOperand::Kind_CCRRegs);
Operands.push_back(op);
- Parser.Lex(); // Eat reg number
+ Parser.Lex(); // Eat the register number.
return MatchOperand_Success;
}
@@ -1349,23 +1454,23 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
static int ConvertCcString(StringRef CondString) {
int CC = StringSwitch<unsigned>(CondString)
- .Case(".f", 0)
- .Case(".un", 1)
- .Case(".eq", 2)
- .Case(".ueq", 3)
- .Case(".olt", 4)
- .Case(".ult", 5)
- .Case(".ole", 6)
- .Case(".ule", 7)
- .Case(".sf", 8)
- .Case(".ngle", 9)
- .Case(".seq", 10)
- .Case(".ngl", 11)
- .Case(".lt", 12)
- .Case(".nge", 13)
- .Case(".le", 14)
- .Case(".ngt", 15)
- .Default(-1);
+ .Case(".f", 0)
+ .Case(".un", 1)
+ .Case(".eq", 2)
+ .Case(".ueq", 3)
+ .Case(".olt", 4)
+ .Case(".ult", 5)
+ .Case(".ole", 6)
+ .Case(".ule", 7)
+ .Case(".sf", 8)
+ .Case(".ngle", 9)
+ .Case(".seq", 10)
+ .Case(".ngl", 11)
+ .Case(".lt", 12)
+ .Case(".nge", 13)
+ .Case(".le", 14)
+ .Case(".ngt", 15)
+ .Default(-1);
return CC;
}
@@ -1373,16 +1478,16 @@ static int ConvertCcString(StringRef CondString) {
bool MipsAsmParser::
parseMathOperation(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // split the format
+ // Split the format.
size_t Start = Name.find('.'), Next = Name.rfind('.');
StringRef Format1 = Name.slice(Start, Next);
- // and add the first format to the operands
+ // Add the first format to the operands.
Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc));
- // now for the second format
+ // Now for the second format.
StringRef Format2 = Name.slice(Next, StringRef::npos);
Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc));
- // set the format for the first register
+ // Set the format for the first register.
setFpFormat(Format1);
// Read the remaining operands.
@@ -1398,11 +1503,10 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, "unexpected token in argument list");
-
}
- Parser.Lex(); // Eat the comma.
+ Parser.Lex(); // Eat the comma.
- //set the format for the first register
+ // Set the format for the first register
setFpFormat(Format2);
// Parse and remember the operand.
@@ -1419,7 +1523,7 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
return Error(Loc, "unexpected token in argument list");
}
- Parser.Lex(); // Consume the EndOfStatement
+ Parser.Lex(); // Consume the EndOfStatement.
return false;
}
@@ -1427,13 +1531,12 @@ bool MipsAsmParser::
ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
StringRef Mnemonic;
- // floating point instructions: should register be treated as double?
+ // Floating point instructions: Should the register be treated as a double?
if (requestsDoubleOperand(Name)) {
setFpFormat(FP_FORMAT_D);
- Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
- Mnemonic = Name;
- }
- else {
+ Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+ Mnemonic = Name;
+ } else {
setDefaultFpFormat();
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
@@ -1442,30 +1545,30 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
if (Next != StringRef::npos) {
- // there is a format token in mnemonic
- // StringRef Rest = Name.slice(Next, StringRef::npos);
- size_t Dot = Name.find('.', Next+1);
+ // There is a format token in mnemonic.
+ size_t Dot = Name.find('.', Next + 1);
StringRef Format = Name.slice(Next, Dot);
- if (Dot == StringRef::npos) //only one '.' in a string, it's a format
+ if (Dot == StringRef::npos) // Only one '.' in a string, it's a format.
Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
else {
- if (Name.startswith("c.")){
- // floating point compare, add '.' and immediate represent for cc
+ if (Name.startswith("c.")) {
+ // Floating point compare, add '.' and immediate represent for cc.
Operands.push_back(MipsOperand::CreateToken(".", NameLoc));
int Cc = ConvertCcString(Format);
if (Cc == -1) {
return Error(NameLoc, "Invalid conditional code");
}
SMLoc E = SMLoc::getFromPointer(
- Parser.getTok().getLoc().getPointer() -1 );
- Operands.push_back(MipsOperand::CreateImm(
- MCConstantExpr::Create(Cc, getContext()), NameLoc, E));
+ Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(
+ MipsOperand::CreateImm(MCConstantExpr::Create(Cc, getContext()),
+ NameLoc, E));
} else {
// trunc, ceil, floor ...
return parseMathOperation(Name, NameLoc, Operands);
}
- // the rest is a format
+ // The rest is a format.
Format = Name.slice(Dot, StringRef::npos);
Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
}
@@ -1483,8 +1586,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return Error(Loc, "unexpected token in argument list");
}
- while (getLexer().is(AsmToken::Comma) ) {
- Parser.Lex(); // Eat the comma.
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
if (ParseOperand(Operands, Name)) {
@@ -1501,48 +1604,47 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return Error(Loc, "unexpected token in argument list");
}
- Parser.Lex(); // Consume the EndOfStatement
+ Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
- SMLoc Loc = getLexer().getLoc();
- Parser.eatToEndOfStatement();
- return Error(Loc, ErrorMsg);
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, ErrorMsg);
}
bool MipsAsmParser::parseSetNoAtDirective() {
- // Line should look like:
- // .set noat
- // set at reg to 0
+ // Line should look like: ".set noat".
+ // set at reg to 0.
Options.setATReg(0);
// eat noat
Parser.Lex();
- // If this is not the end of the statement, report error
+ // If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
}
- Parser.Lex(); // Consume the EndOfStatement
+ Parser.Lex(); // Consume the EndOfStatement.
return false;
}
+
bool MipsAsmParser::parseSetAtDirective() {
- // line can be
- // .set at - defaults to $1
+ // Line can be .set at - defaults to $1
// or .set at=$reg
int AtRegNo;
getParser().Lex();
if (getLexer().is(AsmToken::EndOfStatement)) {
Options.setATReg(1);
- Parser.Lex(); // Consume the EndOfStatement
+ Parser.Lex(); // Consume the EndOfStatement.
return false;
} else if (getLexer().is(AsmToken::Equal)) {
- getParser().Lex(); // eat '='
+ getParser().Lex(); // Eat the '='.
if (getLexer().isNot(AsmToken::Dollar)) {
reportParseError("unexpected token in statement");
return false;
}
- Parser.Lex(); // Eat '$'
+ Parser.Lex(); // Eat the '$'.
const AsmToken &Reg = Parser.getTok();
if (Reg.is(AsmToken::Identifier)) {
AtRegNo = matchCPURegisterName(Reg.getIdentifier());
@@ -1553,7 +1655,7 @@ bool MipsAsmParser::parseSetAtDirective() {
return false;
}
- if ( AtRegNo < 1 || AtRegNo > 31) {
+ if (AtRegNo < 1 || AtRegNo > 31) {
reportParseError("unexpected token in statement");
return false;
}
@@ -1562,13 +1664,13 @@ bool MipsAsmParser::parseSetAtDirective() {
reportParseError("unexpected token in statement");
return false;
}
- getParser().Lex(); // Eat reg
+ getParser().Lex(); // Eat the register.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
- }
- Parser.Lex(); // Consume the EndOfStatement
+ }
+ Parser.Lex(); // Consume the EndOfStatement.
return false;
} else {
reportParseError("unexpected token in statement");
@@ -1578,43 +1680,43 @@ bool MipsAsmParser::parseSetAtDirective() {
bool MipsAsmParser::parseSetReorderDirective() {
Parser.Lex();
- // If this is not the end of the statement, report error
+ // If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
}
Options.setReorder();
- Parser.Lex(); // Consume the EndOfStatement
+ Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetNoReorderDirective() {
- Parser.Lex();
- // if this is not the end of the statement, report error
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- reportParseError("unexpected token in statement");
- return false;
- }
- Options.setNoreorder();
- Parser.Lex(); // Consume the EndOfStatement
+ Parser.Lex();
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
return false;
+ }
+ Options.setNoreorder();
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
}
bool MipsAsmParser::parseSetMacroDirective() {
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token in statement");
return false;
}
Options.setMacro();
- Parser.Lex(); // Consume the EndOfStatement
+ Parser.Lex(); // Consume the EndOfStatement.
return false;
}
bool MipsAsmParser::parseSetNoMacroDirective() {
Parser.Lex();
- // if this is not the end of the statement, report error
+ // If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("`noreorder' must be set before `nomacro'");
return false;
@@ -1624,7 +1726,7 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
return false;
}
Options.setNomacro();
- Parser.Lex(); // Consume the EndOfStatement
+ Parser.Lex(); // Consume the EndOfStatement.
return false;
}
@@ -1637,24 +1739,24 @@ bool MipsAsmParser::parseSetAssignment() {
if (getLexer().isNot(AsmToken::Comma))
return reportParseError("unexpected token in .set directive");
- Lex(); //eat comma
+ Lex(); // Eat comma
if (Parser.parseExpression(Value))
reportParseError("expected valid expression after comma");
- // check if the Name already exists as a symbol
+ // Check if the Name already exists as a symbol.
MCSymbol *Sym = getContext().LookupSymbol(Name);
- if (Sym) {
+ if (Sym)
return reportParseError("symbol already defined");
- }
Sym = getContext().GetOrCreateSymbol(Name);
Sym->setVariableValue(Value);
return false;
}
+
bool MipsAsmParser::parseDirectiveSet() {
- // get next token
+ // Get the next token.
const AsmToken &Tok = Parser.getTok();
if (Tok.getString() == "noat") {
@@ -1670,15 +1772,15 @@ bool MipsAsmParser::parseDirectiveSet() {
} else if (Tok.getString() == "nomacro") {
return parseSetNoMacroDirective();
} else if (Tok.getString() == "nomips16") {
- // ignore this directive for now
+ // Ignore this directive for now.
Parser.eatToEndOfStatement();
return false;
} else if (Tok.getString() == "nomicromips") {
- // ignore this directive for now
+ // Ignore this directive for now.
Parser.eatToEndOfStatement();
return false;
} else {
- // it is just an identifier, look for assignment
+ // It is just an identifier, look for an assignment.
parseSetAssignment();
return false;
}
@@ -1715,20 +1817,20 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
- if ( IDVal == ".ent") {
- // ignore this directive for now
+ if (IDVal == ".ent") {
+ // Ignore this directive for now.
Parser.Lex();
return false;
}
if (IDVal == ".end") {
- // ignore this directive for now
+ // Ignore this directive for now.
Parser.Lex();
return false;
}
if (IDVal == ".frame") {
- // ignore this directive for now
+ // Ignore this directive for now.
Parser.eatToEndOfStatement();
return false;
}
@@ -1738,19 +1840,19 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
}
if (IDVal == ".fmask") {
- // ignore this directive for now
+ // Ignore this directive for now.
Parser.eatToEndOfStatement();
return false;
}
if (IDVal == ".mask") {
- // ignore this directive for now
+ // Ignore this directive for now.
Parser.eatToEndOfStatement();
return false;
}
if (IDVal == ".gpword") {
- // ignore this directive for now
+ // Ignore this directive for now.
Parser.eatToEndOfStatement();
return false;
}
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 59e49d8..0dba33a 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -143,6 +143,16 @@ static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
@@ -496,6 +506,30 @@ static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= 4)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::HIRegsDSPRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= 4)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::LORegsDSPRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index e198a7c..9460731 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -27,6 +27,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
+#define GET_INSTRMAP_INFO
+#include "MipsGenInstrInfo.inc"
+
using namespace llvm;
namespace {
@@ -35,12 +38,13 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
const MCInstrInfo &MCII;
MCContext &Ctx;
+ const MCSubtargetInfo &STI;
bool IsLittleEndian;
public:
MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
const MCSubtargetInfo &sti, bool IsLittle) :
- MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
+ MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {}
~MipsMCCodeEmitter() {}
@@ -88,6 +92,9 @@ public:
unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned
+ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const;
+
}; // class MipsMCCodeEmitter
} // namespace
@@ -141,6 +148,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary)
llvm_unreachable("unimplemented opcode in EncodeInstruction()");
+ if (STI.getFeatureBits() & Mips::FeatureMicroMips) {
+ int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips);
+ if (NewOpcode != -1) {
+ Opcode = NewOpcode;
+ TmpInst.setOpcode (NewOpcode);
+ Binary = getBinaryCodeForInstr(TmpInst, Fixups);
+ }
+ }
+
const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
// Get byte count of instruction
@@ -192,35 +208,24 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
unsigned MipsMCCodeEmitter::
-getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups) const {
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
- return RegNo;
- } else if (MO.isImm()) {
- return static_cast<unsigned>(MO.getImm());
- } else if (MO.isFPImm()) {
- return static_cast<unsigned>(APFloat(MO.getFPImm())
- .bitcastToAPInt().getHiBits(32).getLimitedValue());
- }
+getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
+ int64_t Res;
- // MO must be an Expr.
- assert(MO.isExpr());
+ if (Expr->EvaluateAsAbsolute(Res))
+ return Res;
- const MCExpr *Expr = MO.getExpr();
MCExpr::ExprKind Kind = Expr->getKind();
+ if (Kind == MCExpr::Constant) {
+ return cast<MCConstantExpr>(Expr)->getValue();
+ }
if (Kind == MCExpr::Binary) {
- Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS();
- Kind = Expr->getKind();
+ unsigned Res = getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups);
+ Res += getExprOpValue(cast<MCBinaryExpr>(Expr)->getRHS(), Fixups);
+ return Res;
}
-
- assert (Kind == MCExpr::SymbolRef);
-
+ if (Kind == MCExpr::SymbolRef) {
Mips::Fixups FixupKind = Mips::Fixups(0);
switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
@@ -300,12 +305,32 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
break;
} // switch
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
-
- // All of the information is in the fixup.
+ Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind)));
+ return 0;
+ }
return 0;
}
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
+ return RegNo;
+ } else if (MO.isImm()) {
+ return static_cast<unsigned>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ return static_cast<unsigned>(APFloat(MO.getFPImm())
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ }
+ // MO must be an Expr.
+ assert(MO.isExpr());
+ return getExprOpValue(MO.getExpr(),Fixups);
+}
+
/// getMemEncoding - Return binary encoding of memory related operand.
/// If the offset operand requires relocation, record the relocation.
unsigned
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
new file mode 100644
index 0000000..665b4d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -0,0 +1,112 @@
+class MMArch {
+ string Arch = "micromips";
+ list<dag> Pattern = [];
+}
+
+class ADD_FM_MM<bits<6> op, bits<10> funct> : MMArch {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0;
+ let Inst{9-0} = funct;
+}
+
+class ADDI_FM_MM<bits<6> op> : MMArch {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = imm16;
+}
+
+class SLTI_FM_MM<bits<6> op> : MMArch {
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class LUI_FM_MM : MMArch {
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = 0xd;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm16;
+}
+
+class MULT_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class SRA_FM_MM<bits<10> funct, bit rotate> : MMArch {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> shamt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = shamt;
+ let Inst{10} = rotate;
+ let Inst{9-0} = funct;
+}
+
+class SRLV_FM_MM<bits<10> funct, bit rotate> : MMArch {
+ bits<5> rd;
+ bits<5> rt;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = rd;
+ let Inst{10} = rotate;
+ let Inst{9-0} = funct;
+}
+
+class LW_FM_MM<bits<6> op> : MMArch {
+ bits<5> rt;
+ bits<21> addr;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-0} = addr{15-0};
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
new file mode 100644
index 0000000..74cdccd
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -0,0 +1,67 @@
+let isCodeGenOnly = 1 in {
+ /// Arithmetic Instructions (ALU Immediate)
+ def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd>,
+ ADDI_FM_MM<0xc>;
+ def ADDi_MM : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>,
+ ADDI_FM_MM<0x4>;
+ def SLTi_MM : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>,
+ SLTI_FM_MM<0x24>;
+ def SLTiu_MM : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>,
+ SLTI_FM_MM<0x2c>;
+ def ANDi_MM : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+ ADDI_FM_MM<0x34>;
+ def ORi_MM : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+ ADDI_FM_MM<0x14>;
+ def XORi_MM : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+ ADDI_FM_MM<0x1c>;
+ def LUi_MM : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM_MM;
+
+ /// Arithmetic Instructions (3-Operand, R-Type)
+ def ADDu_MM : MMRel, ArithLogicR<"addu", CPURegsOpnd>, ADD_FM_MM<0, 0x150>;
+ def SUBu_MM : MMRel, ArithLogicR<"subu", CPURegsOpnd>, ADD_FM_MM<0, 0x1d0>;
+ def MUL_MM : MMRel, ArithLogicR<"mul", CPURegsOpnd>, ADD_FM_MM<0, 0x210>;
+ def ADD_MM : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM_MM<0, 0x110>;
+ def SUB_MM : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM_MM<0, 0x190>;
+ def SLT_MM : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM_MM<0, 0x350>;
+ def SLTu_MM : MMRel, SetCC_R<"sltu", setult, CPURegs>,
+ ADD_FM_MM<0, 0x390>;
+ def AND_MM : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>,
+ ADD_FM_MM<0, 0x250>;
+ def OR_MM : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>,
+ ADD_FM_MM<0, 0x290>;
+ def XOR_MM : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>,
+ ADD_FM_MM<0, 0x310>;
+ def NOR_MM : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM_MM<0, 0x2d0>;
+ def MULT_MM : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>,
+ MULT_FM_MM<0x22c>;
+ def MULTu_MM : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>,
+ MULT_FM_MM<0x26c>;
+
+ /// Shift Instructions
+ def SLL_MM : MMRel, shift_rotate_imm<"sll", shamt, CPURegsOpnd>,
+ SRA_FM_MM<0, 0>;
+ def SRL_MM : MMRel, shift_rotate_imm<"srl", shamt, CPURegsOpnd>,
+ SRA_FM_MM<0x40, 0>;
+ def SRA_MM : MMRel, shift_rotate_imm<"sra", shamt, CPURegsOpnd>,
+ SRA_FM_MM<0x80, 0>;
+ def SLLV_MM : MMRel, shift_rotate_reg<"sllv", CPURegsOpnd>,
+ SRLV_FM_MM<0x10, 0>;
+ def SRLV_MM : MMRel, shift_rotate_reg<"srlv", CPURegsOpnd>,
+ SRLV_FM_MM<0x50, 0>;
+ def SRAV_MM : MMRel, shift_rotate_reg<"srav", CPURegsOpnd>,
+ SRLV_FM_MM<0x90, 0>;
+ def ROTR_MM : MMRel, shift_rotate_imm<"rotr", shamt, CPURegsOpnd>,
+ SRA_FM_MM<0xc0, 0>;
+ def ROTRV_MM : MMRel, shift_rotate_reg<"rotrv", CPURegsOpnd>,
+ SRLV_FM_MM<0xd0, 0>;
+
+ /// Load and Store Instructions - aligned
+ defm LB_MM : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM_MM<0x7>;
+ defm LBu_MM : LoadM<"lbu", CPURegs, zextloadi8>, MMRel, LW_FM_MM<0x5>;
+ defm LH_MM : LoadM<"lh", CPURegs, sextloadi16>, MMRel, LW_FM_MM<0xf>;
+ defm LHu_MM : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM_MM<0xd>;
+ defm LW_MM : LoadM<"lw", CPURegs>, MMRel, LW_FM_MM<0x3f>;
+ defm SB_MM : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM_MM<0x6>;
+ defm SH_MM : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM_MM<0xe>;
+ defm SW_MM : StoreM<"sw", CPURegs>, MMRel, LW_FM_MM<0x3e>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 00b3449..c1c635c 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -35,6 +35,11 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ if (!Subtarget.inMips16Mode())
+ return false;
+ return MipsDAGToDAGISel::runOnMachineFunction(MF);
+}
/// Select multiply instructions.
std::pair<SDNode*, SDNode*>
Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
@@ -267,7 +272,7 @@ std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
EVT VT = LHS.getValueType();
unsigned Sltu_op = Mips::SltuRxRyRz16;
- SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+ SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops);
unsigned Addu_op = Mips::AdduRxRyRz16;
SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
SDValue(Carry,0), RHS);
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
index baa8587..f05f9b7 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -28,6 +28,8 @@ private:
SDValue getMips16SPAliasReg();
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 23eb537..f63318f 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -53,7 +53,6 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
if (Mips16HardFloat)
setMips16HardFloatLibCalls();
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
@@ -614,7 +613,8 @@ MachineBasicBlock
unsigned regX = MI->getOperand(0).getReg();
unsigned regY = MI->getOperand(1).getReg();
MachineBasicBlock *target = MI->getOperand(2).getMBB();
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX)
+ .addReg(regY);
BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -636,7 +636,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
CmpOpc = CmpiXOpc;
else
llvm_unreachable("immediate field not usable");
- BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX)
+ .addImm(imm);
BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
index 6cca227..7ad18f2 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -1,5 +1,4 @@
-
-//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
+//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information --------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index 846a822..fc533fb 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -66,14 +66,12 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc],
defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>;
}
-/// Pseudo instructions for loading, storing and copying accumulator registers.
+/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1 in {
defm LOAD_AC128 : LoadM<"load_ac128", ACRegs128>;
defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>;
}
-def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>;
-
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 1876cb6..6e4feda 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -46,6 +46,10 @@
using namespace llvm;
bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ // Initialize TargetLoweringObjectFile.
+ if (Subtarget->allowMixed16_32())
+ const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+ .Initialize(OutContext, TM);
MipsFI = MF.getInfo<MipsFunctionInfo>();
AsmPrinter::runOnMachineFunction(MF);
return true;
@@ -245,12 +249,18 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
void MipsAsmPrinter::EmitFunctionBodyStart() {
MCInstLowering.Initialize(Mang, &MF->getContext());
- emitFrameDirective();
+ bool IsNakedFunction =
+ MF->getFunction()->
+ getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::Naked);
+ if (!IsNakedFunction)
+ emitFrameDirective();
if (OutStreamer.hasRawTextSupport()) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
- printSavedRegsBitmask(OS);
+ if (!IsNakedFunction)
+ printSavedRegsBitmask(OS);
OutStreamer.EmitRawText(OS.str());
if (!Subtarget->inMips16Mode()) {
OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
@@ -419,12 +429,18 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNum, unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
+ int Offset = 0;
+ // Currently we are expecting either no ExtraCode or 'D'
+ if (ExtraCode) {
+ if (ExtraCode[0] == 'D')
+ Offset = 4;
+ else
+ return true; // Unknown modifier.
+ }
const MachineOperand &MO = MI->getOperand(OpNum);
assert(MO.isReg() && "unexpected inline asm memory operand");
- O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+ O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
return false;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
index 1d86d90..3fc402b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -116,7 +116,7 @@ private:
int Offset) const;
/// Expand pseudo instructions with accumulator register operands.
- void expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+ void expandACCInstr(MachineBasicBlock::instr_iterator MI,
MachineBasicBlock &MBB, unsigned Opc) const;
/// \brief Expand pseudo instruction. Return true if MI was expanded.
@@ -302,7 +302,7 @@ void MipsCodeEmitter::emitWord(unsigned Word) {
MCE.emitWordBE(Word);
}
-void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI,
MachineBasicBlock &MBB,
unsigned Opc) const {
// Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index b5de1eb..1951324 100644
--- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -80,6 +80,10 @@ FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
}
bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
- return true;
+ // The intention is for this to be a mips16 only pass for now
+ // FIXME:
+ // if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+ // return false;
+ return false;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
index a72a763..cf09113 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -219,6 +219,33 @@ class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst {
let Inst{5-0} = funct;
}
+// MFHI sub-class format.
+class MFHI_FMT<bits<6> funct> : DSPInst {
+ bits<5> rd;
+ bits<2> ac;
+
+ let Inst{31-26} = 0;
+ let Inst{25-23} = 0;
+ let Inst{22-21} = ac;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
+// MTHI sub-class format.
+class MTHI_FMT<bits<6> funct> : DSPInst {
+ bits<5> rs;
+ bits<2> ac;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rs;
+ let Inst{20-13} = 0;
+ let Inst{12-11} = ac;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = funct;
+}
+
// EXTR.W sub-class format (type 1).
class EXTR_W_TY1_FMT<bits<5> op> : DSPInst {
bits<5> rt;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index 3c116e1..c12878a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -26,6 +26,8 @@ def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>;
def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsSHIFT_DSP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
class MipsDSPBase<string Opc, SDTypeProfile Prof> :
SDNode<!strconcat("MipsISD::", Opc), Prof>;
@@ -74,18 +76,19 @@ def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>;
def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>;
def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
+def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSETCC_DSP : MipsDSPBase<"SETCC_DSP", SDTSetCC>;
+def MipsSELECT_CC_DSP : MipsDSPBase<"SELECT_CC_DSP", SDTSelectCC>;
// Flags.
-class UseAC {
- list<Register> Uses = [AC0];
+class Uses<list<Register> Regs> {
+ list<Register> Uses = Regs;
}
-class UseDSPCtrl {
- list<Register> Uses = [DSPCtrl];
-}
-
-class ClearDefs {
- list<Register> Defs = [];
+class Defs<list<Register> Regs> {
+ list<Register> Defs = Regs;
}
// Instruction encoding.
@@ -145,6 +148,10 @@ class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>;
class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>;
class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>;
class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>;
+class MFHI_ENC : MFHI_FMT<0b010000>;
+class MFLO_ENC : MFHI_FMT<0b010010>;
+class MTHI_ENC : MTHI_FMT<0b010001>;
+class MTLO_ENC : MTHI_FMT<0b010011>;
class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>;
class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>;
class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>;
@@ -256,7 +263,6 @@ class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -267,7 +273,6 @@ class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -278,7 +283,6 @@ class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rs, $rt");
list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -289,7 +293,6 @@ class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -300,7 +303,6 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
string Constraints = "$src = $rt";
}
@@ -312,7 +314,6 @@ class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -322,7 +323,6 @@ class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -332,7 +332,6 @@ class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -343,7 +342,7 @@ class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
+ bit hasSideEffects = 1;
}
class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -354,7 +353,6 @@ class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set CPURegs:$rd,
(OpNode CPURegs:$base, CPURegs:$index))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
bit mayLoad = 1;
}
@@ -366,7 +364,6 @@ class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -377,7 +374,6 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set CPURegs:$rt,
(OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
string Constraints = "$src = $rt";
}
@@ -387,7 +383,6 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -396,7 +391,6 @@ class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -405,7 +399,6 @@ class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
list<dag> Pattern = [(set ACRegsDSP:$ac,
(OpNode immSExt6:$shift, ACRegsDSP:$acin))];
- list<Register> Defs = [DSPCtrl];
string Constraints = "$acin = $ac";
}
@@ -415,7 +408,6 @@ class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
list<dag> Pattern = [(set ACRegsDSP:$ac,
(OpNode CPURegs:$rs, ACRegsDSP:$acin))];
- list<Register> Defs = [DSPCtrl];
string Constraints = "$acin = $ac";
}
@@ -425,7 +417,6 @@ class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
list<dag> Pattern = [(set ACRegsDSP:$ac,
(OpNode CPURegs:$rs, ACRegsDSP:$acin))];
- list<Register> Uses = [DSPCtrl];
string Constraints = "$acin = $ac";
}
@@ -436,7 +427,6 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))];
InstrItinClass Itinerary = itin;
- list<Register> Uses = [DSPCtrl];
}
class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -446,7 +436,6 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)];
InstrItinClass Itinerary = itin;
- list<Register> Defs = [DSPCtrl];
}
class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -455,7 +444,6 @@ class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
list<dag> Pattern = [(set ACRegsDSP:$ac,
(OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
- list<Register> Defs = [DSPCtrl];
string Constraints = "$acin = $ac";
}
@@ -482,9 +470,22 @@ class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string Constraints = "$acin = $ac";
}
+class MFHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
+ dag OutOperandList = (outs CPURegs:$rd);
+ dag InOperandList = (ins RC:$ac);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
+ InstrItinClass Itinerary = itin;
+}
+
+class MTHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
+ dag OutOperandList = (outs RC:$ac);
+ dag InOperandList = (ins CPURegs:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+ InstrItinClass Itinerary = itin;
+}
+
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> {
- list<Register> Uses = [DSPCtrl];
bit usesCustomInserter = 1;
}
@@ -493,7 +494,6 @@ class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> {
dag InOperandList = (ins brtarget:$offset);
string AsmString = !strconcat(instr_asm, "\t$offset");
InstrItinClass Itinerary = itin;
- list<Register> Uses = [DSPCtrl];
bit isBranch = 1;
bit isTerminator = 1;
bit hasDelaySlot = 1;
@@ -506,7 +506,6 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))];
InstrItinClass Itinerary = itin;
- list<Register> Uses = [DSPCtrl];
string Constraints = "$src = $rt";
}
@@ -515,178 +514,183 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
//===----------------------------------------------------------------------===//
// Addition/subtraction
-class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable;
+class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", null_frag, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable,
+ Defs<[DSPOutFlag20]>;
class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
NoItinerary, DSPRegs, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPOutFlag20]>;
-class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary,
- DSPRegs, DSPRegs>;
+class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary,
+ DSPRegs, DSPRegs>,
+ Defs<[DSPOutFlag20]>;
class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPRegs, DSPRegs>,
+ Defs<[DSPOutFlag20]>;
-class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable;
+class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary,
+ DSPRegs, DSPRegs>, IsCommutable,
+ Defs<[DSPOutFlag20]>;
class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
NoItinerary, DSPRegs, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPOutFlag20]>;
-class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary,
- DSPRegs, DSPRegs>;
+class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary,
+ DSPRegs, DSPRegs>,
+ Defs<[DSPOutFlag20]>;
class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPRegs, DSPRegs>,
+ Defs<[DSPOutFlag20]>;
class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
NoItinerary, CPURegs, CPURegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPOutFlag20]>;
class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
- NoItinerary, CPURegs, CPURegs>;
+ NoItinerary, CPURegs, CPURegs>,
+ Defs<[DSPOutFlag20]>;
-class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary,
- CPURegs, CPURegs>, IsCommutable;
+class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary,
+ CPURegs, CPURegs>, IsCommutable,
+ Defs<[DSPCarry]>;
-class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary,
+class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary,
CPURegs, CPURegs>,
- IsCommutable, UseDSPCtrl;
+ IsCommutable, Uses<[DSPCarry]>, Defs<[DSPOutFlag20]>;
class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary,
- CPURegs, CPURegs>, ClearDefs;
+ CPURegs, CPURegs>;
class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb,
- NoItinerary, CPURegs, DSPRegs>,
- ClearDefs;
+ NoItinerary, CPURegs, DSPRegs>;
// Absolute value
class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPRegs>,
+ Defs<[DSPOutFlag20]>;
class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w,
- NoItinerary, CPURegs>;
+ NoItinerary, CPURegs>,
+ Defs<[DSPOutFlag20]>;
// Precision reduce/expand
class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph",
int_mips_precrq_qb_ph,
- NoItinerary, DSPRegs, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs, DSPRegs>;
class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w",
int_mips_precrq_ph_w,
- NoItinerary, DSPRegs, CPURegs>,
- ClearDefs;
+ NoItinerary, DSPRegs, CPURegs>;
class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w",
int_mips_precrq_rs_ph_w,
NoItinerary, DSPRegs,
- CPURegs>;
+ CPURegs>,
+ Defs<[DSPOutFlag22]>;
class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph",
int_mips_precrqu_s_qb_ph,
NoItinerary, DSPRegs,
- DSPRegs>;
+ DSPRegs>,
+ Defs<[DSPOutFlag22]>;
class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl",
int_mips_preceq_w_phl,
- NoItinerary, CPURegs, DSPRegs>,
- ClearDefs;
+ NoItinerary, CPURegs, DSPRegs>;
class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr",
int_mips_preceq_w_phr,
- NoItinerary, CPURegs, DSPRegs>,
- ClearDefs;
+ NoItinerary, CPURegs, DSPRegs>;
class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl",
int_mips_precequ_ph_qbl,
- NoItinerary, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs>;
class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr",
int_mips_precequ_ph_qbr,
- NoItinerary, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs>;
class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla",
int_mips_precequ_ph_qbla,
- NoItinerary, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs>;
class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra",
int_mips_precequ_ph_qbra,
- NoItinerary, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs>;
class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl",
int_mips_preceu_ph_qbl,
- NoItinerary, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs>;
class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr",
int_mips_preceu_ph_qbr,
- NoItinerary, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs>;
class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla",
int_mips_preceu_ph_qbla,
- NoItinerary, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs>;
class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
int_mips_preceu_ph_qbra,
- NoItinerary, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs>;
// Shift
-class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3,
- NoItinerary, DSPRegs>;
+class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
+ NoItinerary, DSPRegs>,
+ Defs<[DSPOutFlag22]>;
class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPRegs>,
+ Defs<[DSPOutFlag22]>;
-class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3,
- NoItinerary, DSPRegs>, ClearDefs;
+class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
+ NoItinerary, DSPRegs>;
class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
-class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4,
- NoItinerary, DSPRegs>;
+class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
+ NoItinerary, DSPRegs>,
+ Defs<[DSPOutFlag22]>;
class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPRegs>,
+ Defs<[DSPOutFlag22]>;
class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
- immZExt4, NoItinerary, DSPRegs>;
+ immZExt4, NoItinerary, DSPRegs>,
+ Defs<[DSPOutFlag22]>;
class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPRegs>,
+ Defs<[DSPOutFlag22]>;
-class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4,
- NoItinerary, DSPRegs>, ClearDefs;
+class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
+ NoItinerary, DSPRegs>;
class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
- immZExt4, NoItinerary, DSPRegs>,
- ClearDefs;
+ immZExt4, NoItinerary, DSPRegs>;
class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
- immZExt5, NoItinerary, CPURegs>;
+ immZExt5, NoItinerary, CPURegs>,
+ Defs<[DSPOutFlag22]>;
class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
- NoItinerary, CPURegs>;
+ NoItinerary, CPURegs>,
+ Defs<[DSPOutFlag22]>;
class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
- immZExt5, NoItinerary, CPURegs>,
- ClearDefs;
+ immZExt5, NoItinerary, CPURegs>;
class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
NoItinerary, CPURegs>;
@@ -694,36 +698,49 @@ class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
// Multiplication
class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl",
int_mips_muleu_s_ph_qbl,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPRegs, DSPRegs>,
+ Defs<[DSPOutFlag21]>;
class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr",
int_mips_muleu_s_ph_qbr,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPRegs, DSPRegs>,
+ Defs<[DSPOutFlag21]>;
class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl",
int_mips_muleq_s_w_phl,
NoItinerary, CPURegs, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPOutFlag21]>;
class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr",
int_mips_muleq_s_w_phr,
NoItinerary, CPURegs, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPOutFlag21]>;
class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
NoItinerary, DSPRegs, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPOutFlag21]>;
class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
- MipsMULSAQ_S_W_PH>;
+ MipsMULSAQ_S_W_PH>,
+ Defs<[DSPOutFlag16_19]>;
-class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>;
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>,
+ Defs<[DSPOutFlag16_19]>;
-class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>;
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>,
+ Defs<[DSPOutFlag16_19]>;
-class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>;
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>,
+ Defs<[DSPOutFlag16_19]>;
-class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>;
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>,
+ Defs<[DSPOutFlag16_19]>;
+
+// Move from/to hi/lo.
+class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HIRegsDSP, NoItinerary>;
+class MFLO_DESC : MFHI_DESC_BASE<"mflo", LORegsDSP, NoItinerary>;
+class MTHI_DESC : MTHI_DESC_BASE<"mthi", HIRegsDSP, NoItinerary>;
+class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LORegsDSP, NoItinerary>;
// Dot product with accumulate/subtract
class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
@@ -734,13 +751,17 @@ class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>;
class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>;
-class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>;
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>,
+ Defs<[DSPOutFlag16_19]>;
-class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>;
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>,
+ Defs<[DSPOutFlag16_19]>;
-class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>;
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>,
+ Defs<[DSPOutFlag16_19]>;
-class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>;
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>,
+ Defs<[DSPOutFlag16_19]>;
class MULT_DSP_DESC : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>;
class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>;
@@ -752,15 +773,16 @@ class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
// Comparison
class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
int_mips_cmpu_eq_qb, NoItinerary,
- DSPRegs>, IsCommutable;
+ DSPRegs>,
+ IsCommutable, Defs<[DSPCCond]>;
class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb",
int_mips_cmpu_lt_qb, NoItinerary,
- DSPRegs>, IsCommutable;
+ DSPRegs>, Defs<[DSPCCond]>;
class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb",
int_mips_cmpu_le_qb, NoItinerary,
- DSPRegs>, IsCommutable;
+ DSPRegs>, Defs<[DSPCCond]>;
class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
int_mips_cmpgu_eq_qb,
@@ -769,222 +791,235 @@ class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb",
int_mips_cmpgu_lt_qb,
- NoItinerary, CPURegs, DSPRegs>,
- IsCommutable;
+ NoItinerary, CPURegs, DSPRegs>;
class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb",
int_mips_cmpgu_le_qb,
- NoItinerary, CPURegs, DSPRegs>,
- IsCommutable;
+ NoItinerary, CPURegs, DSPRegs>;
class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph,
NoItinerary, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPCCond]>;
class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph,
NoItinerary, DSPRegs>,
- IsCommutable;
+ Defs<[DSPCCond]>;
class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph,
NoItinerary, DSPRegs>,
- IsCommutable;
+ Defs<[DSPCCond]>;
// Misc
class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev,
- NoItinerary, CPURegs>, ClearDefs;
+ NoItinerary, CPURegs>;
class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
- NoItinerary, DSPRegs, DSPRegs>,
- ClearDefs;
+ NoItinerary, DSPRegs, DSPRegs>;
class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
- NoItinerary, DSPRegs, CPURegs>,
- ClearDefs;
+ NoItinerary, DSPRegs, CPURegs>;
class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
- NoItinerary, DSPRegs, CPURegs>,
- ClearDefs;
+ NoItinerary, DSPRegs, CPURegs>;
class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb,
NoItinerary, DSPRegs, DSPRegs>,
- ClearDefs, UseDSPCtrl;
+ Uses<[DSPCCond]>;
class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph,
NoItinerary, DSPRegs, DSPRegs>,
- ClearDefs, UseDSPCtrl;
+ Uses<[DSPCCond]>;
-class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs;
+class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>;
-class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs;
+class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>;
-class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs;
+class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>;
class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>;
// Extr
-class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>;
+class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>;
+class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>;
+class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP,
- NoItinerary>;
+ NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>;
+class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W,
- NoItinerary>;
+ NoItinerary>, Defs<[DSPOutFlag23]>;
class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W,
- NoItinerary>;
+ NoItinerary>,
+ Defs<[DSPOutFlag23]>;
class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W,
- NoItinerary>;
+ NoItinerary>,
+ Defs<[DSPOutFlag23]>;
class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W,
- NoItinerary>;
+ NoItinerary>,
+ Defs<[DSPOutFlag23]>;
class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W,
- NoItinerary>;
+ NoItinerary>,
+ Defs<[DSPOutFlag23]>;
class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
- NoItinerary>;
+ NoItinerary>,
+ Defs<[DSPOutFlag23]>;
class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
- NoItinerary>;
+ NoItinerary>,
+ Defs<[DSPOutFlag23]>;
class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>;
-class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>;
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>, Defs<[DSPPos]>;
class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>;
-class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>;
+class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>,
+ Uses<[DSPPos, DSPSCount]>;
//===----------------------------------------------------------------------===//
// MIPS DSP Rev 2
// Addition/subtraction
class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable;
+ DSPRegs, DSPRegs>, IsCommutable,
+ Defs<[DSPOutFlag20]>;
class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph,
NoItinerary, DSPRegs, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPOutFlag20]>;
class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary,
- DSPRegs, DSPRegs>;
+ DSPRegs, DSPRegs>,
+ Defs<[DSPOutFlag20]>;
class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPRegs, DSPRegs>,
+ Defs<[DSPOutFlag20]>;
class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb,
- NoItinerary, DSPRegs>,
- ClearDefs, IsCommutable;
+ NoItinerary, DSPRegs>, IsCommutable;
class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb,
- NoItinerary, DSPRegs>,
- ClearDefs, IsCommutable;
+ NoItinerary, DSPRegs>, IsCommutable;
class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph,
- NoItinerary, DSPRegs>,
- ClearDefs, IsCommutable;
+ NoItinerary, DSPRegs>, IsCommutable;
class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph,
- NoItinerary, DSPRegs>,
- ClearDefs, IsCommutable;
+ NoItinerary, DSPRegs>, IsCommutable;
class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w,
- NoItinerary, CPURegs>,
- ClearDefs, IsCommutable;
+ NoItinerary, CPURegs>, IsCommutable;
class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w,
- NoItinerary, CPURegs>,
- ClearDefs, IsCommutable;
+ NoItinerary, CPURegs>, IsCommutable;
class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w,
- NoItinerary, CPURegs>, ClearDefs;
+ NoItinerary, CPURegs>;
class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w,
- NoItinerary, CPURegs>, ClearDefs;
+ NoItinerary, CPURegs>;
// Comparison
class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb",
int_mips_cmpgdu_eq_qb,
NoItinerary, CPURegs, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPCCond]>;
class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb",
int_mips_cmpgdu_lt_qb,
NoItinerary, CPURegs, DSPRegs>,
- IsCommutable;
+ Defs<[DSPCCond]>;
class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb",
int_mips_cmpgdu_le_qb,
NoItinerary, CPURegs, DSPRegs>,
- IsCommutable;
+ Defs<[DSPCCond]>;
// Absolute
class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPRegs>,
+ Defs<[DSPOutFlag20]>;
// Multiplication
-class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary,
- DSPRegs>, IsCommutable;
+class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary,
+ DSPRegs>, IsCommutable,
+ Defs<[DSPOutFlag21]>;
class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPRegs>, IsCommutable,
+ Defs<[DSPOutFlag21]>;
class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w,
- NoItinerary, CPURegs>, IsCommutable;
+ NoItinerary, CPURegs>, IsCommutable,
+ Defs<[DSPOutFlag21]>;
class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w,
- NoItinerary, CPURegs>, IsCommutable;
+ NoItinerary, CPURegs>, IsCommutable,
+ Defs<[DSPOutFlag21]>;
class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
NoItinerary, DSPRegs, DSPRegs>,
- IsCommutable;
+ IsCommutable, Defs<[DSPOutFlag21]>;
// Dot product with accumulate/subtract
class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>;
class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>;
-class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>;
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>,
+ Defs<[DSPOutFlag16_19]>;
class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph",
- MipsDPAQX_SA_W_PH>;
+ MipsDPAQX_SA_W_PH>,
+ Defs<[DSPOutFlag16_19]>;
class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>;
class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>;
-class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>;
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>,
+ Defs<[DSPOutFlag16_19]>;
class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph",
- MipsDPSQX_SA_W_PH>;
+ MipsDPSQX_SA_W_PH>,
+ Defs<[DSPOutFlag16_19]>;
class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
@@ -996,45 +1031,45 @@ class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w",
int_mips_precr_sra_ph_w,
NoItinerary, DSPRegs,
- CPURegs>, ClearDefs;
+ CPURegs>;
class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
int_mips_precr_sra_r_ph_w,
NoItinerary, DSPRegs,
- CPURegs>, ClearDefs;
+ CPURegs>;
// Shift
-class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3,
- NoItinerary, DSPRegs>, ClearDefs;
+class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
+ NoItinerary, DSPRegs>;
class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
- immZExt3, NoItinerary, DSPRegs>,
- ClearDefs;
+ immZExt3, NoItinerary, DSPRegs>;
class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
-class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4,
- NoItinerary, DSPRegs>, ClearDefs;
+class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
+ NoItinerary, DSPRegs>;
class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
- NoItinerary, DSPRegs>, ClearDefs;
+ NoItinerary, DSPRegs>;
// Misc
class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
- NoItinerary>, ClearDefs;
+ NoItinerary>;
class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
- NoItinerary>, ClearDefs;
+ NoItinerary>;
class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
- NoItinerary>, ClearDefs;
+ NoItinerary>;
// Pseudos.
-def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>;
+def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
+ NoItinerary>, Uses<[DSPPos]>;
// Instruction defs.
// MIPS DSP Rev 1
@@ -1094,6 +1129,10 @@ def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP : MFHI_ENC, MFHI_DESC;
+def MFLO_DSP : MFLO_ENC, MFLO_DESC;
+def MTHI_DSP : MTHI_ENC, MTHI_DESC;
+def MTLO_DSP : MTLO_ENC, MTLO_DESC;
def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
@@ -1201,13 +1240,35 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
}
// Pseudos.
-/// Pseudo instructions for loading, storing and copying accumulator registers.
let isPseudo = 1 in {
+ // Pseudo instructions for loading and storing accumulator registers.
defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSP>;
defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>;
+
+ // Pseudos for loading and storing ccond field of DSP control register.
+ defm LOAD_CCOND_DSP : LoadM<"load_ccond_dsp", DSPCC>;
+ defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>;
}
-def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+// Pseudo CMP and PICK instructions.
+class PseudoCMP<Instruction RealInst> :
+ PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>,
+ PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects;
+
+class PseudoPICK<Instruction RealInst> :
+ PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>,
+ PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>,
+ NeverHasSideEffects;
+
+def PseudoCMP_EQ_PH : PseudoCMP<CMP_EQ_PH>;
+def PseudoCMP_LT_PH : PseudoCMP<CMP_LT_PH>;
+def PseudoCMP_LE_PH : PseudoCMP<CMP_LE_PH>;
+def PseudoCMPU_EQ_QB : PseudoCMP<CMPU_EQ_QB>;
+def PseudoCMPU_LT_QB : PseudoCMP<CMPU_LT_QB>;
+def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
+
+def PseudoPICK_PH : PseudoPICK<PICK_PH>;
+def PseudoPICK_QB : PseudoPICK<PICK_QB>;
// Patterns.
class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
@@ -1232,6 +1293,95 @@ def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
(SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
+// Binary operations.
+class DSPBinPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
+ Predicate Pred = HasDSP> :
+ DSPPat<(Node ValTy:$a, ValTy:$b), (Inst ValTy:$a, ValTy:$b), Pred>;
+
+def : DSPBinPat<ADDQ_PH, v2i16, int_mips_addq_ph>;
+def : DSPBinPat<ADDQ_PH, v2i16, add>;
+def : DSPBinPat<SUBQ_PH, v2i16, int_mips_subq_ph>;
+def : DSPBinPat<SUBQ_PH, v2i16, sub>;
+def : DSPBinPat<MUL_PH, v2i16, int_mips_mul_ph, HasDSPR2>;
+def : DSPBinPat<MUL_PH, v2i16, mul, HasDSPR2>;
+def : DSPBinPat<ADDU_QB, v4i8, int_mips_addu_qb>;
+def : DSPBinPat<ADDU_QB, v4i8, add>;
+def : DSPBinPat<SUBU_QB, v4i8, int_mips_subu_qb>;
+def : DSPBinPat<SUBU_QB, v4i8, sub>;
+def : DSPBinPat<ADDSC, i32, int_mips_addsc>;
+def : DSPBinPat<ADDSC, i32, addc>;
+def : DSPBinPat<ADDWC, i32, int_mips_addwc>;
+def : DSPBinPat<ADDWC, i32, adde>;
+
+// Shift immediate patterns.
+class DSPShiftPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
+ SDPatternOperator Imm, Predicate Pred = HasDSP> :
+ DSPPat<(Node ValTy:$a, Imm:$shamt), (Inst ValTy:$a, Imm:$shamt), Pred>;
+
+def : DSPShiftPat<SHLL_PH, v2i16, MipsSHLL_DSP, imm>;
+def : DSPShiftPat<SHRA_PH, v2i16, MipsSHRA_DSP, imm>;
+def : DSPShiftPat<SHRL_PH, v2i16, MipsSHRL_DSP, imm, HasDSPR2>;
+def : DSPShiftPat<SHLL_PH, v2i16, int_mips_shll_ph, immZExt4>;
+def : DSPShiftPat<SHRA_PH, v2i16, int_mips_shra_ph, immZExt4>;
+def : DSPShiftPat<SHRL_PH, v2i16, int_mips_shrl_ph, immZExt4, HasDSPR2>;
+def : DSPShiftPat<SHLL_QB, v4i8, MipsSHLL_DSP, imm>;
+def : DSPShiftPat<SHRA_QB, v4i8, MipsSHRA_DSP, imm, HasDSPR2>;
+def : DSPShiftPat<SHRL_QB, v4i8, MipsSHRL_DSP, imm>;
+def : DSPShiftPat<SHLL_QB, v4i8, int_mips_shll_qb, immZExt3>;
+def : DSPShiftPat<SHRA_QB, v4i8, int_mips_shra_qb, immZExt3, HasDSPR2>;
+def : DSPShiftPat<SHRL_QB, v4i8, int_mips_shrl_qb, immZExt3>;
+
+// SETCC/SELECT_CC patterns.
+class DSPSetCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
+ CondCode CC> :
+ DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
+ (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
+ (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)),
+ (ValTy ZERO)))>;
+
+class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
+ CondCode CC> :
+ DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
+ (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
+ (ValTy ZERO),
+ (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs))))>;
+
+class DSPSelectCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
+ CondCode CC> :
+ DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)),
+ (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $c, $d))>;
+
+class DSPSelectCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
+ CondCode CC> :
+ DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)),
+ (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $d, $c))>;
+
+def : DSPSetCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>;
+def : DSPSetCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>;
+def : DSPSetCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>;
+def : DSPSetCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>;
+def : DSPSetCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>;
+def : DSPSetCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>;
+def : DSPSetCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>;
+def : DSPSetCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>;
+def : DSPSetCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>;
+def : DSPSetCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>;
+def : DSPSetCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>;
+def : DSPSetCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
+
+def : DSPSelectCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>;
+def : DSPSelectCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>;
+def : DSPSelectCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>;
+def : DSPSelectCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>;
+def : DSPSelectCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>;
+def : DSPSelectCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>;
+def : DSPSelectCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>;
+def : DSPSelectCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>;
+def : DSPSelectCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>;
+def : DSPSelectCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>;
+def : DSPSelectCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>;
+def : DSPSelectCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
+
// Extr patterns.
class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 77b08cb..968e536 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -17,7 +17,6 @@
#include "MipsSEISelDAGToDAG.h"
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
-#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsRegisterInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index e2219f2..4d76181 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -30,7 +30,6 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -198,6 +197,11 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP";
case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP";
case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP";
+ case MipsISD::SHLL_DSP: return "MipsISD::SHLL_DSP";
+ case MipsISD::SHRA_DSP: return "MipsISD::SHRA_DSP";
+ case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP";
+ case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP";
+ case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP";
default: return NULL;
}
}
@@ -211,7 +215,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Mips does not have i1 type, so use i32 for
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
@@ -346,9 +350,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
-
// Use the default for now
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -449,7 +450,7 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+static Mips::CondCode condCodeToFCC(ISD::CondCode CC) {
switch (CC) {
default: llvm_unreachable("Unknown fp condition code!");
case ISD::SETEQ:
@@ -508,7 +509,7 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
- DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+ DAG.getConstant(condCodeToFCC(CC), MVT::i32));
}
// Creates and returns a CMovFPT/F node.
@@ -712,10 +713,7 @@ void
MipsTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
- SDValue Res = LowerOperation(SDValue(N, 0), DAG);
-
- for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
- Results.push_back(Res.getValue(I));
+ return LowerOperationWrapper(N, Results, DAG);
}
SDValue MipsTargetLowering::
@@ -739,15 +737,12 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG);
- case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG);
case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG);
case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false);
case ISD::LOAD: return lowerLOAD(Op, DAG);
case ISD::STORE: return lowerSTORE(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::ADD: return lowerADD(Op, DAG);
}
return SDValue();
@@ -1827,15 +1822,6 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
Chain.getValue(1));
}
-// TODO: set SType according to the desired memory barrier behavior.
-SDValue
-MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
- unsigned SType = 0;
- DebugLoc DL = Op.getDebugLoc();
- return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
- DAG.getConstant(SType, MVT::i32));
-}
-
SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SelectionDAG &DAG) const {
// FIXME: Need pseudo-fence for 'singlethread' fences
@@ -1918,7 +1904,7 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
return DAG.getMergeValues(Ops, 2, DL);
}
-static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
+static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
SDValue Chain, SDValue Src, unsigned Offset) {
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT();
@@ -1958,15 +1944,15 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// (set tmp, (ldl (add baseptr, 7), undef))
// (set dst, (ldr baseptr, tmp))
if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) {
- SDValue LDL = CreateLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
+ SDValue LDL = createLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
IsLittle ? 7 : 0);
- return CreateLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
+ return createLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
IsLittle ? 0 : 7);
}
- SDValue LWL = CreateLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
+ SDValue LWL = createLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
IsLittle ? 3 : 0);
- SDValue LWR = CreateLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
+ SDValue LWR = createLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
IsLittle ? 0 : 3);
// Expand
@@ -1997,7 +1983,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, 2, DL);
}
-static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
+static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
SDValue Chain, unsigned Offset) {
SDValue Ptr = SD->getBasePtr(), Value = SD->getValue();
EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType();
@@ -2034,9 +2020,9 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// (swl val, (add baseptr, 3))
// (swr val, baseptr)
if ((VT == MVT::i32) || SD->isTruncatingStore()) {
- SDValue SWL = CreateStoreLR(MipsISD::SWL, DAG, SD, Chain,
+ SDValue SWL = createStoreLR(MipsISD::SWL, DAG, SD, Chain,
IsLittle ? 3 : 0);
- return CreateStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
+ return createStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
}
assert(VT == MVT::i64);
@@ -2046,172 +2032,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// to
// (sdl val, (add baseptr, 7))
// (sdr val, baseptr)
- SDValue SDL = CreateStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
- return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
-}
-
-static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
- SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
- DAG.getConstant(0, MVT::i32));
- SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
- DAG.getConstant(1, MVT::i32));
- return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
-}
-
-static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
- SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
- DAG.getConstant(Mips::sub_lo, MVT::i32));
- SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
- DAG.getConstant(Mips::sub_hi, MVT::i32));
- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
-}
-
-// This function expands mips intrinsic nodes which have 64-bit input operands
-// or output values.
-//
-// out64 = intrinsic-node in64
-// =>
-// lo = copy (extract-element (in64, 0))
-// hi = copy (extract-element (in64, 1))
-// mips-specific-node
-// v0 = copy lo
-// v1 = copy hi
-// out64 = merge-values (v0, v1)
-//
-static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
- DebugLoc DL = Op.getDebugLoc();
- bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
- SmallVector<SDValue, 3> Ops;
- unsigned OpNo = 0;
-
- // See if Op has a chain input.
- if (HasChainIn)
- Ops.push_back(Op->getOperand(OpNo++));
-
- // The next operand is the intrinsic opcode.
- assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
-
- // See if the next operand has type i64.
- SDValue Opnd = Op->getOperand(++OpNo), In64;
-
- if (Opnd.getValueType() == MVT::i64)
- In64 = initAccumulator(Opnd, DL, DAG);
- else
- Ops.push_back(Opnd);
-
- // Push the remaining operands.
- for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
- Ops.push_back(Op->getOperand(OpNo));
-
- // Add In64 to the end of the list.
- if (In64.getNode())
- Ops.push_back(In64);
-
- // Scan output.
- SmallVector<EVT, 2> ResTys;
-
- for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
- I != E; ++I)
- ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
-
- // Create node.
- SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
- SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
-
- if (!HasChainIn)
- return Out;
-
- assert(Val->getValueType(1) == MVT::Other);
- SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
- return DAG.getMergeValues(Vals, 2, DL);
-}
-
-SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
- switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
- default:
- return SDValue();
- case Intrinsic::mips_shilo:
- return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
- case Intrinsic::mips_dpau_h_qbl:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
- case Intrinsic::mips_dpau_h_qbr:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
- case Intrinsic::mips_dpsu_h_qbl:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
- case Intrinsic::mips_dpsu_h_qbr:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
- case Intrinsic::mips_dpa_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
- case Intrinsic::mips_dps_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
- case Intrinsic::mips_dpax_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
- case Intrinsic::mips_dpsx_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
- case Intrinsic::mips_mulsa_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
- case Intrinsic::mips_mult:
- return lowerDSPIntr(Op, DAG, MipsISD::Mult);
- case Intrinsic::mips_multu:
- return lowerDSPIntr(Op, DAG, MipsISD::Multu);
- case Intrinsic::mips_madd:
- return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
- case Intrinsic::mips_maddu:
- return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
- case Intrinsic::mips_msub:
- return lowerDSPIntr(Op, DAG, MipsISD::MSub);
- case Intrinsic::mips_msubu:
- return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
- }
-}
-
-SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
- switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
- default:
- return SDValue();
- case Intrinsic::mips_extp:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
- case Intrinsic::mips_extpdp:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
- case Intrinsic::mips_extr_w:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
- case Intrinsic::mips_extr_r_w:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
- case Intrinsic::mips_extr_rs_w:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
- case Intrinsic::mips_extr_s_h:
- return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
- case Intrinsic::mips_mthlip:
- return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
- case Intrinsic::mips_mulsaq_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
- case Intrinsic::mips_maq_s_w_phl:
- return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
- case Intrinsic::mips_maq_s_w_phr:
- return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
- case Intrinsic::mips_maq_sa_w_phl:
- return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
- case Intrinsic::mips_maq_sa_w_phr:
- return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
- case Intrinsic::mips_dpaq_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
- case Intrinsic::mips_dpsq_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
- case Intrinsic::mips_dpaq_sa_l_w:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
- case Intrinsic::mips_dpsq_sa_l_w:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
- case Intrinsic::mips_dpaqx_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
- case Intrinsic::mips_dpaqx_sa_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
- case Intrinsic::mips_dpsqx_s_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
- case Intrinsic::mips_dpsqx_sa_w_ph:
- return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
- }
+ SDValue SDL = createStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
+ return createStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
}
SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
@@ -3009,8 +2831,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
return std::make_pair((unsigned)Mips::T9_64, &Mips::CPU64RegsRegClass);
case 'l': // register suitable for indirect jump
if (VT == MVT::i32)
- return std::make_pair((unsigned)Mips::LO, &Mips::HILORegClass);
- return std::make_pair((unsigned)Mips::LO64, &Mips::HILO64RegClass);
+ return std::make_pair((unsigned)Mips::LO, &Mips::LORegsRegClass);
+ return std::make_pair((unsigned)Mips::LO64, &Mips::LORegs64RegClass);
case 'x': // register suitable for indirect jump
// Fixme: Not triggering the use of both hi and low
// This will generate an error message
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index cab71a6..5587e8f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -143,6 +143,15 @@ namespace llvm {
MSUB_DSP,
MSUBU_DSP,
+ // DSP shift nodes.
+ SHLL_DSP,
+ SHRA_DSP,
+ SHRL_DSP,
+
+ // DSP setcc and select_cc nodes.
+ SETCC_DSP,
+ SELECT_CC_DSP,
+
// Load/Store Left/Right nodes.
LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
LWR,
@@ -338,15 +347,12 @@ namespace llvm {
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
bool IsSRA) const;
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
/// isEligibleForTailCallOptimization - Check whether the call is eligible
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
index ee432c8..ea07372 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -36,6 +36,24 @@ def FrmFR : Format<4>;
def FrmFI : Format<5>;
def FrmOther : Format<6>; // Instruction w/ a custom format
+class MMRel;
+
+def Std2MicroMips : InstrMapping {
+ let FilterClass = "MMRel";
+ // Instructions with the same BaseOpcode and isNVStore values form a row.
+ let RowFields = ["BaseOpcode"];
+ // Instructions with the same predicate sense form a column.
+ let ColFields = ["Arch"];
+ // The key column is the unpredicated instructions.
+ let KeyCol = ["se"];
+ // Value columns are PredSense=true and PredSense=false
+ let ValueCols = [["se"], ["micromips"]];
+}
+
+class StdArch {
+ string Arch = "se";
+}
+
// Generic Mips Format
class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
InstrItinClass itin, Format f>: Instruction
@@ -74,9 +92,11 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
// Mips32/64 Instruction Format
class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin, Format f>:
+ InstrItinClass itin, Format f, string opstr = ""> :
MipsInst<outs, ins, asmstr, pattern, itin, f> {
let Predicates = [HasStdEnc];
+ string BaseOpcode = opstr;
+ string Arch;
}
// Mips Pseudo Instructions Format
@@ -192,7 +212,7 @@ class MFC3OP_FM<bits<6> op, bits<5> mfmt>
let Inst{2-0} = sel;
}
-class ADD_FM<bits<6> op, bits<6> funct> {
+class ADD_FM<bits<6> op, bits<6> funct> : StdArch {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -207,7 +227,7 @@ class ADD_FM<bits<6> op, bits<6> funct> {
let Inst{5-0} = funct;
}
-class ADDI_FM<bits<6> op> {
+class ADDI_FM<bits<6> op> : StdArch {
bits<5> rs;
bits<5> rt;
bits<16> imm16;
@@ -220,7 +240,7 @@ class ADDI_FM<bits<6> op> {
let Inst{15-0} = imm16;
}
-class SRA_FM<bits<6> funct, bit rotate> {
+class SRA_FM<bits<6> funct, bit rotate> : StdArch {
bits<5> rd;
bits<5> rt;
bits<5> shamt;
@@ -236,7 +256,7 @@ class SRA_FM<bits<6> funct, bit rotate> {
let Inst{5-0} = funct;
}
-class SRLV_FM<bits<6> funct, bit rotate> {
+class SRLV_FM<bits<6> funct, bit rotate> : StdArch {
bits<5> rd;
bits<5> rt;
bits<5> rs;
@@ -288,7 +308,7 @@ class B_FM {
let Inst{15-0} = offset;
}
-class SLTI_FM<bits<6> op> {
+class SLTI_FM<bits<6> op> : StdArch {
bits<5> rt;
bits<5> rs;
bits<16> imm16;
@@ -413,7 +433,7 @@ class SYNC_FM {
let Inst{5-0} = 0xf;
}
-class MULT_FM<bits<6> op, bits<6> funct> {
+class MULT_FM<bits<6> op, bits<6> funct> : StdArch {
bits<5> rs;
bits<5> rt;
@@ -529,7 +549,7 @@ class MFC1_FM<bits<5> funct> {
let Inst{10-0} = 0;
}
-class LW_FM<bits<6> op> {
+class LW_FM<bits<6> op> : StdArch {
bits<5> rt;
bits<21> addr;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index 3a82e81..86ec729 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -179,6 +179,7 @@ def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">,
AssemblerPredicate<"FeatureMips32">;
def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">,
AssemblerPredicate<"!FeatureMips16">;
+def NotDSP : Predicate<"!Subtarget.hasDSP()">;
class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [HasStdEnc];
@@ -374,11 +375,9 @@ class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
SDPatternOperator OpNode = null_frag>:
InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
!strconcat(opstr, "\t$rd, $rs, $rt"),
- [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+ [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR, opstr> {
let isCommutable = isComm;
let isReMaterializable = 1;
- string BaseOpcode;
- string Arch;
}
// Arithmetic and logical instructions with 2 register operands.
@@ -387,7 +386,8 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
SDPatternOperator OpNode = null_frag> :
InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
!strconcat(opstr, "\t$rt, $rs, $imm16"),
- [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
+ [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))],
+ IIAlu, FrmI, opstr> {
let isReMaterializable = 1;
}
@@ -404,7 +404,7 @@ class MArithR<string opstr, bit isComm = 0> :
class LogicNOR<string opstr, RegisterOperand RC>:
InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
!strconcat(opstr, "\t$rd, $rs, $rt"),
- [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
+ [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR, opstr> {
let isCommutable = 1;
}
@@ -414,13 +414,13 @@ class shift_rotate_imm<string opstr, Operand ImmOpnd,
SDPatternOperator PF = null_frag> :
InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
!strconcat(opstr, "\t$rd, $rt, $shamt"),
- [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>;
+ [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR, opstr>;
class shift_rotate_reg<string opstr, RegisterOperand RC,
SDPatternOperator OpNode = null_frag>:
InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt),
!strconcat(opstr, "\t$rd, $rt, $rs"),
- [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>;
+ [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR, opstr>;
// Load Upper Imediate
class LoadUpper<string opstr, RegisterClass RC, Operand Imm>:
@@ -440,18 +440,20 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
// Memory Load/Store
class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
- Operand MemOpnd, ComplexPattern Addr> :
+ Operand MemOpnd, ComplexPattern Addr, string ofsuffix> :
InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> {
+ [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI,
+ !strconcat(opstr, ofsuffix)> {
let DecoderMethod = "DecodeMem";
let canFoldAsLoad = 1;
let mayLoad = 1;
}
class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
- Operand MemOpnd, ComplexPattern Addr> :
+ Operand MemOpnd, ComplexPattern Addr, string ofsuffix> :
InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> {
+ [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI,
+ !strconcat(opstr, ofsuffix)> {
let DecoderMethod = "DecodeMem";
let mayStore = 1;
}
@@ -459,8 +461,9 @@ class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
multiclass LoadM<string opstr, RegisterClass RC,
SDPatternOperator OpNode = null_frag,
ComplexPattern Addr = addr> {
- def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
- def _P8 : Load<opstr, OpNode, RC, mem64, Addr>,
+ def NAME : Load<opstr, OpNode, RC, mem, Addr, "">,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Load<opstr, OpNode, RC, mem64, Addr, "_p8">,
Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
@@ -470,8 +473,9 @@ multiclass LoadM<string opstr, RegisterClass RC,
multiclass StoreM<string opstr, RegisterClass RC,
SDPatternOperator OpNode = null_frag,
ComplexPattern Addr = addr> {
- def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
- def _P8 : Store<opstr, OpNode, RC, mem64, Addr>,
+ def NAME : Store<opstr, OpNode, RC, mem, Addr, "">,
+ Requires<[NotN64, HasStdEnc]>;
+ def _P8 : Store<opstr, OpNode, RC, mem64, Addr, "_p8">,
Requires<[IsN64, HasStdEnc]> {
let DecoderNamespace = "Mips64";
let isCodeGenOnly = 1;
@@ -542,14 +546,15 @@ class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
!strconcat(opstr, "\t$rd, $rs, $rt"),
- [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
+ [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))],
+ IIAlu, FrmR, opstr>;
class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
RegisterClass RC>:
InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
!strconcat(opstr, "\t$rt, $rs, $imm16"),
[(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))],
- IIAlu, FrmI>;
+ IIAlu, FrmI, opstr>;
// Jump
class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
@@ -636,7 +641,7 @@ class SYNC_FT :
class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
list<Register> DefRegs> :
InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
- itin, FrmR> {
+ itin, FrmR, opstr> {
let isCommutable = 1;
let Defs = DefRegs;
let neverHasSideEffects = 1;
@@ -832,14 +837,12 @@ let usesCustomInserter = 1 in {
defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>;
}
-/// Pseudo instructions for loading, storing and copying accumulator registers.
+/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1 in {
defm LOAD_AC64 : LoadM<"load_ac64", ACRegs>;
defm STORE_AC64 : StoreM<"store_ac64", ACRegs>;
}
-def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
-
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
@@ -848,60 +851,70 @@ def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
//===----------------------------------------------------------------------===//
/// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
+def ADDiu : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
ADDI_FM<0x9>, IsAsCheapAsAMove;
-def ADDi : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
-def SLTi : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
-def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
-def ANDi : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+def ADDi : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
+def SLTi : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>,
+ SLTI_FM<0xa>;
+def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>,
+ SLTI_FM<0xb>;
+def ANDi : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
ADDI_FM<0xc>;
-def ORi : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+def ORi : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
ADDI_FM<0xd>;
-def XORi : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+def XORi : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
ADDI_FM<0xe>;
-def LUi : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
+def LUi : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
/// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
-def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
-def MUL : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
-def ADD : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
-def SUB : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
-def SLT : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
-def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
-def AND : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
-def OR : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
-def XOR : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
-def NOR : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
+def ADDu : MMRel, ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>,
+ ADD_FM<0, 0x21>;
+def SUBu : MMRel, ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>,
+ ADD_FM<0, 0x23>;
+def MUL : MMRel, ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>,
+ ADD_FM<0x1c, 2>;
+def ADD : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
+def SLT : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
+def SLTu : MMRel, SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
+def AND : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>,
+ ADD_FM<0, 0x24>;
+def OR : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>,
+ ADD_FM<0, 0x25>;
+def XOR : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>,
+ ADD_FM<0, 0x26>;
+def NOR : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def SLL : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
+def SLL : MMRel, shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
SRA_FM<0, 0>;
-def SRL : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
+def SRL : MMRel, shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
SRA_FM<2, 0>;
-def SRA : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
+def SRA : MMRel, shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
SRA_FM<3, 0>;
-def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
-def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
-def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
+def SLLV : MMRel, shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
+def SRLV : MMRel, shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
+def SRAV : MMRel, shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
// Rotate Instructions
let Predicates = [HasMips32r2, HasStdEnc] in {
- def ROTR : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>,
+ def ROTR : MMRel, shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr,
+ immZExt5>,
SRA_FM<2, 1>;
- def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>;
+ def ROTRV : MMRel, shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>,
+ SRLV_FM<6, 1>;
}
/// Load and Store Instructions
/// aligned
-defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
-defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>;
-defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>;
-defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
-defm LW : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>;
-defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
-defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
-defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
+defm LB : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM<0x20>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, MMRel, LW_FM<0x24>;
+defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, MMRel, LW_FM<0x21>;
+defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM<0x25>;
+defm LW : LoadM<"lw", CPURegs, load, addrDefault>, MMRel, LW_FM<0x23>;
+defm SB : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM<0x28>;
+defm SH : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM<0x29>;
+defm SW : StoreM<"sw", CPURegs, store>, MMRel, LW_FM<0x2b>;
/// load/store left/right
defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>;
@@ -968,8 +981,10 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
/// Multiply and Divide Instructions.
-def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
-def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
+def MULT : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>,
+ MULT_FM<0, 0x18>;
+def MULTu : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>,
+ MULT_FM<0, 0x19>;
def PseudoMULT : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>;
def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>;
def SDIV : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
@@ -1066,10 +1081,10 @@ def : InstAlias<"negu $rt, $rs",
def : InstAlias<"slt $rs, $rt, $imm",
(SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>;
def : InstAlias<"xor $rs, $rt, $imm",
- (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+ (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>,
Requires<[NotMips64]>;
def : InstAlias<"or $rs, $rt, $imm",
- (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+ (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>,
Requires<[NotMips64]>;
def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
def : InstAlias<"mfc0 $rt, $rd",
@@ -1128,10 +1143,12 @@ def : MipsPat<(i32 imm:$imm),
// Carry MipsPatterns
def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs),
(SUBu CPURegs:$lhs, CPURegs:$rhs)>;
-def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
- (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
-def : MipsPat<(addc CPURegs:$src, immSExt16:$imm),
- (ADDiu CPURegs:$src, imm:$imm)>;
+let Predicates = [HasStdEnc, NotDSP] in {
+ def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
+ (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+ def : MipsPat<(addc CPURegs:$src, immSExt16:$imm),
+ (ADDiu CPURegs:$src, imm:$imm)>;
+}
// Call
def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
@@ -1326,3 +1343,6 @@ include "Mips16InstrInfo.td"
include "MipsDSPInstrFormats.td"
include "MipsDSPInstrInfo.td"
+// Micromips
+include "MicroMipsInstrFormats.td"
+include "MicroMipsInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
index 2efe534..bf5ad37 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
@@ -399,6 +399,8 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
}
bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
+ if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+ return false;
if ((TM.getRelocationModel() == Reloc::PIC_) &&
TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
F.getInfo<MipsFunctionInfo>()->globalBaseRegSet())
diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
new file mode 100644
index 0000000..c6abf17
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+// Instruction Selector Subtarget Control
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// This file defines a pass used to change the subtarget for the
+// Mips Instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsISelDAGToDAG.h"
+#include "MipsModuleISelDAGToDAG.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
+ const_cast<MipsSubtarget&>(Subtarget).resetSubtarget(&MF);
+ return false;
+}
+
+char MipsModuleDAGToDAGISel::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) {
+ return new MipsModuleDAGToDAGISel(TM);
+}
+
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h
new file mode 100644
index 0000000..fda35ae
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.h
@@ -0,0 +1,66 @@
+//===---- MipsModuleISelDAGToDAG.h - Change Subtarget --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass used to change the subtarget for the
+// Mips Instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMODULEISELDAGTODAG_H
+#define MIPSMODULEISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsModuleDAGToDAGISel : public MachineFunctionPass {
+public:
+
+ static char ID;
+
+ explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
+ : MachineFunctionPass(ID),
+ TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual SDNode *Select(SDNode *N) {
+ llvm_unreachable("unexpected");
+ }
+
+protected:
+ /// Keep a pointer to the MipsSubtarget around so that we can make the right
+ /// decision when generating code for different targets.
+ const TargetMachine &TM;
+ const MipsSubtarget &Subtarget;
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM);
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
new file mode 100644
index 0000000..1919077
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
@@ -0,0 +1,113 @@
+//===---- MipsOs16.cpp for Mips Option -Os16 --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an optimization phase for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-os16"
+#include "MipsOs16.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace {
+
+ // Figure out if we need float point based on the function signature.
+ // We need to move variables in and/or out of floating point
+ // registers because of the ABI
+ //
+ bool needsFPFromSig(Function &F) {
+ Type* RetType = F.getReturnType();
+ switch (RetType->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return true;
+ default:
+ ;
+ }
+ if (F.arg_size() >=1) {
+ Argument &Arg = F.getArgumentList().front();
+ switch (Arg.getType()->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return true;
+ default:
+ ;
+ }
+ }
+ return false;
+ }
+
+ // Figure out if the function will need floating point operations
+ //
+ bool needsFP(Function &F) {
+ if (needsFPFromSig(F))
+ return true;
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ const Instruction &Inst = *I;
+ switch (Inst.getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FCmp:
+ return true;
+ default:
+ ;
+ }
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ DEBUG(dbgs() << "Working on call" << "\n");
+ Function &F_ = *CI->getCalledFunction();
+ if (needsFPFromSig(F_))
+ return true;
+ }
+ }
+ return false;
+ }
+}
+namespace llvm {
+
+
+bool MipsOs16::runOnModule(Module &M) {
+ DEBUG(errs() << "Run on Module MipsOs16\n");
+ bool modified = false;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->isDeclaration()) continue;
+ DEBUG(dbgs() << "Working on " << F->getName() << "\n");
+ if (needsFP(*F)) {
+ DEBUG(dbgs() << " need to compile as nomips16 \n");
+ F->addFnAttr("nomips16");
+ }
+ else {
+ F->addFnAttr("mips16");
+ DEBUG(dbgs() << " no need to compile as nomips16 \n");
+ }
+ }
+ return modified;
+}
+
+char MipsOs16::ID = 0;
+
+}
+
+ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) {
+ return new MipsOs16;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.h b/contrib/llvm/lib/Target/Mips/MipsOs16.h
new file mode 100644
index 0000000..21beef8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsOs16.h
@@ -0,0 +1,49 @@
+//===---- MipsOs16.h for Mips Option -Os16 --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an optimization phase for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+
+
+#ifndef MIPSOS16_H
+#define MIPSOS16_H
+
+using namespace llvm;
+
+namespace llvm {
+
+class MipsOs16 : public ModulePass {
+
+public:
+ static char ID;
+
+ MipsOs16() : ModulePass(ID) {
+
+ }
+
+ virtual const char *getPassName() const {
+ return "MIPS Os16 Optimization";
+ }
+
+ virtual bool runOnModule(Module &M);
+
+};
+
+ModulePass *createMipsOs16(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 3250733..dead07b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -145,7 +145,11 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::HWR29_64);
// Reserve DSP control register.
- Reserved.set(Mips::DSPCtrl);
+ Reserved.set(Mips::DSPPos);
+ Reserved.set(Mips::DSPSCount);
+ Reserved.set(Mips::DSPCarry);
+ Reserved.set(Mips::DSPEFI);
+ Reserved.set(Mips::DSPOutFlag);
// Reserve RA if in mips16 mode.
if (Subtarget.inMips16Mode()) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
index 64458bc..229f167 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -16,6 +16,11 @@ def sub_fpodd : SubRegIndex;
def sub_32 : SubRegIndex;
def sub_lo : SubRegIndex;
def sub_hi : SubRegIndex;
+def sub_dsp16_19 : SubRegIndex;
+def sub_dsp20 : SubRegIndex;
+def sub_dsp21 : SubRegIndex;
+def sub_dsp22 : SubRegIndex;
+def sub_dsp23 : SubRegIndex;
}
class Unallocatable {
@@ -229,14 +234,14 @@ let Namespace = "Mips" in {
def D31_64 : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>;
// Hi/Lo registers
- def HI : Register<"hi">, DwarfRegNum<[64]>;
- def HI1 : Register<"hi1">, DwarfRegNum<[176]>;
- def HI2 : Register<"hi2">, DwarfRegNum<[178]>;
- def HI3 : Register<"hi3">, DwarfRegNum<[180]>;
- def LO : Register<"lo">, DwarfRegNum<[65]>;
- def LO1 : Register<"lo1">, DwarfRegNum<[177]>;
- def LO2 : Register<"lo2">, DwarfRegNum<[179]>;
- def LO3 : Register<"lo3">, DwarfRegNum<[181]>;
+ def HI : Register<"ac0">, DwarfRegNum<[64]>;
+ def HI1 : Register<"ac1">, DwarfRegNum<[176]>;
+ def HI2 : Register<"ac2">, DwarfRegNum<[178]>;
+ def HI3 : Register<"ac3">, DwarfRegNum<[180]>;
+ def LO : Register<"ac0">, DwarfRegNum<[65]>;
+ def LO1 : Register<"ac1">, DwarfRegNum<[177]>;
+ def LO2 : Register<"ac2">, DwarfRegNum<[179]>;
+ def LO3 : Register<"ac3">, DwarfRegNum<[181]>;
let SubRegIndices = [sub_32] in {
def HI64 : RegisterWithSubRegs<"hi", [HI]>;
@@ -264,7 +269,23 @@ let Namespace = "Mips" in {
def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
- def DSPCtrl : Register<"dspctrl">;
+ // DSP-ASE control register fields.
+ def DSPPos : Register<"">;
+ def DSPSCount : Register<"">;
+ def DSPCarry : Register<"">;
+ def DSPEFI : Register<"">;
+ def DSPOutFlag16_19 : Register<"">;
+ def DSPOutFlag20 : Register<"">;
+ def DSPOutFlag21 : Register<"">;
+ def DSPOutFlag22 : Register<"">;
+ def DSPOutFlag23 : Register<"">;
+ def DSPCCond : Register<"">;
+
+ let SubRegIndices = [sub_dsp16_19, sub_dsp20, sub_dsp21, sub_dsp22,
+ sub_dsp23] in
+ def DSPOutFlag : RegisterWithSubRegs<"", [DSPOutFlag16_19, DSPOutFlag20,
+ DSPOutFlag21, DSPOutFlag22,
+ DSPOutFlag23]>;
}
//===----------------------------------------------------------------------===//
@@ -340,8 +361,12 @@ def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
// Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
+def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>;
+def HIRegs : RegisterClass<"Mips", [i32], 32, (add HI)>;
+def LORegsDSP : RegisterClass<"Mips", [i32], 32, (add LO, LO1, LO2, LO3)>;
+def HIRegsDSP : RegisterClass<"Mips", [i32], 32, (add HI, HI1, HI2, HI3)>;
+def LORegs64 : RegisterClass<"Mips", [i64], 64, (add LO64)>;
+def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>;
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
@@ -360,6 +385,9 @@ def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
let Size = 64;
}
+def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
+
+// Register Operands.
def CPURegsAsmOperand : AsmOperandClass {
let Name = "CPURegsAsm";
let ParserMethod = "parseCPURegs";
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 68ec921..b295e91 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -32,17 +32,21 @@ using namespace llvm;
namespace {
typedef MachineBasicBlock::iterator Iter;
-/// Helper class to expand accumulator pseudos.
-class ExpandACCPseudo {
+/// Helper class to expand pseudos.
+class ExpandPseudo {
public:
- ExpandACCPseudo(MachineFunction &MF);
+ ExpandPseudo(MachineFunction &MF);
bool expand();
private:
bool expandInstr(MachineBasicBlock &MBB, Iter I);
- void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
- void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
- void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandLoadCCond(MachineBasicBlock &MBB, Iter I);
+ void expandStoreCCond(MachineBasicBlock &MBB, Iter I);
+ void expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ bool expandCopy(MachineBasicBlock &MBB, Iter I);
+ bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
+ unsigned Src, unsigned RegSize);
MachineFunction &MF;
const MipsSEInstrInfo &TII;
@@ -51,12 +55,12 @@ private:
};
}
-ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_)
+ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
: MF(MF_),
TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())),
RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {}
-bool ExpandACCPseudo::expand() {
+bool ExpandPseudo::expand() {
bool Expanded = false;
for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end();
@@ -67,34 +71,39 @@ bool ExpandACCPseudo::expand() {
return Expanded;
}
-bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
+bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
switch(I->getOpcode()) {
+ case Mips::LOAD_CCOND_DSP:
+ case Mips::LOAD_CCOND_DSP_P8:
+ expandLoadCCond(MBB, I);
+ break;
+ case Mips::STORE_CCOND_DSP:
+ case Mips::STORE_CCOND_DSP_P8:
+ expandStoreCCond(MBB, I);
+ break;
case Mips::LOAD_AC64:
case Mips::LOAD_AC64_P8:
case Mips::LOAD_AC_DSP:
case Mips::LOAD_AC_DSP_P8:
- expandLoad(MBB, I, 4);
+ expandLoadACC(MBB, I, 4);
break;
case Mips::LOAD_AC128:
case Mips::LOAD_AC128_P8:
- expandLoad(MBB, I, 8);
+ expandLoadACC(MBB, I, 8);
break;
case Mips::STORE_AC64:
case Mips::STORE_AC64_P8:
case Mips::STORE_AC_DSP:
case Mips::STORE_AC_DSP_P8:
- expandStore(MBB, I, 4);
+ expandStoreACC(MBB, I, 4);
break;
case Mips::STORE_AC128:
case Mips::STORE_AC128_P8:
- expandStore(MBB, I, 8);
+ expandStoreACC(MBB, I, 8);
break;
- case Mips::COPY_AC64:
- case Mips::COPY_AC_DSP:
- expandCopy(MBB, I, 4);
- break;
- case Mips::COPY_AC128:
- expandCopy(MBB, I, 8);
+ case TargetOpcode::COPY:
+ if (!expandCopy(MBB, I))
+ return false;
break;
default:
return false;
@@ -104,7 +113,37 @@ bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
return true;
}
-void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
+void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) {
+ // load $vr, FI
+ // copy ccond, $vr
+
+ assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(4);
+ unsigned VR = MRI.createVirtualRegister(RC);
+ unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+
+ TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0);
+ BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst)
+ .addReg(VR, RegState::Kill);
+}
+
+void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) {
+ // copy $vr, ccond
+ // store $vr, FI
+
+ assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+ const TargetRegisterClass *RC = RegInfo.intRegClass(4);
+ unsigned VR = MRI.createVirtualRegister(RC);
+ unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+
+ BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR)
+ .addReg(Src, getKillRegState(I->getOperand(0).isKill()));
+ TII.storeRegToStack(MBB, I, VR, true, FI, RC, &RegInfo, 0);
+}
+
+void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
unsigned RegSize) {
// load $vr0, FI
// copy lo, $vr0
@@ -128,7 +167,7 @@ void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill);
}
-void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
+void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
unsigned RegSize) {
// copy $vr0, lo
// store $vr0, FI
@@ -152,8 +191,20 @@ void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
}
-void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
- unsigned RegSize) {
+bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) {
+ unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+
+ if (Mips::ACRegsDSPRegClass.contains(Dst, Src))
+ return expandCopyACC(MBB, I, Dst, Src, 4);
+
+ if (Mips::ACRegs128RegClass.contains(Dst, Src))
+ return expandCopyACC(MBB, I, Dst, Src, 8);
+
+ return false;
+}
+
+bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
+ unsigned Src, unsigned RegSize) {
// copy $vr0, src_lo
// copy dst_lo, $vr0
// copy $vr1, src_hi
@@ -162,7 +213,6 @@ void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
unsigned VR0 = MRI.createVirtualRegister(RC);
unsigned VR1 = MRI.createVirtualRegister(RC);
- unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
@@ -176,6 +226,7 @@ void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
.addReg(VR1, RegState::Kill);
+ return true;
}
unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
@@ -438,7 +489,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Expand pseudo instructions which load, store or copy accumulators.
// Add an emergency spill slot if a pseudo was expanded.
- if (ExpandACCPseudo(MF).expand()) {
+ if (ExpandPseudo(MF).expand()) {
// The spill slot should be half the size of the accumulator. If target is
// mips64, it should be 64-bit, otherwise it should be 32-bt.
const TargetRegisterClass *RC = STI.hasMips64() ?
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index d6d2207..8a6523a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -35,6 +35,36 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ if (Subtarget.inMips16Mode())
+ return false;
+ return MipsDAGToDAGISel::runOnMachineFunction(MF);
+}
+
+void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
+ MachineFunction &MF) {
+ MachineInstrBuilder MIB(MF, &MI);
+ unsigned Mask = MI.getOperand(1).getImm();
+ unsigned Flag = IsDef ? RegState::ImplicitDefine : RegState::Implicit;
+
+ if (Mask & 1)
+ MIB.addReg(Mips::DSPPos, Flag);
+
+ if (Mask & 2)
+ MIB.addReg(Mips::DSPSCount, Flag);
+
+ if (Mask & 4)
+ MIB.addReg(Mips::DSPCarry, Flag);
+
+ if (Mask & 8)
+ MIB.addReg(Mips::DSPOutFlag, Flag);
+
+ if (Mask & 16)
+ MIB.addReg(Mips::DSPCCond, Flag);
+
+ if (Mask & 32)
+ MIB.addReg(Mips::DSPEFI, Flag);
+}
bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
const MachineInstr& MI) {
@@ -173,29 +203,14 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
++MFI)
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
- replaceUsesWithZeroReg(MRI, *I);
-}
-
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
- bool HasLo, bool HasHi) {
- SDNode *Lo = 0, *Hi = 0;
- SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
- N->getOperand(1));
- SDValue InFlag = SDValue(Mul, 0);
-
- if (HasLo) {
- unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
- Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
- InFlag = SDValue(Lo, 1);
- }
- if (HasHi) {
- unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
- Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
- }
- return std::make_pair(Lo, Hi);
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
+ if (I->getOpcode() == Mips::RDDSP)
+ addDSPCtrlRegOperands(false, *I, MF);
+ else if (I->getOpcode() == Mips::WRDSP)
+ addDSPCtrlRegOperands(true, *I, MF);
+ else
+ replaceUsesWithZeroReg(MRI, *I);
+ }
}
SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
@@ -211,7 +226,7 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
- SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+ SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops);
SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
SDValue(Carry, 0), RHS);
return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
@@ -307,9 +322,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
// Instruction Selection not handled by the auto-generated
// tablegen selection should be handled here.
///
- EVT NodeTy = Node->getValueType(0);
SDNode *Result;
- unsigned MultOpc;
switch(Opcode) {
default: break;
@@ -321,51 +334,13 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
}
case ISD::ADDE: {
+ if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
+ break;
SDValue InFlag = Node->getOperand(2);
Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
return std::make_pair(true, Result);
}
- /// Mul with two results
- case ISD::SMUL_LOHI:
- case ISD::UMUL_LOHI: {
- if (NodeTy == MVT::i32)
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
- else
- MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
- std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
- true, true);
-
- if (!SDValue(Node, 0).use_empty())
- ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
- if (!SDValue(Node, 1).use_empty())
- ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
- return std::make_pair(true, (SDNode*)NULL);
- }
-
- /// Special Muls
- case ISD::MUL: {
- // Mips32 has a 32-bit three operand mul instruction.
- if (Subtarget.hasMips32() && NodeTy == MVT::i32)
- break;
- MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
- Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
- return std::make_pair(true, Result);
- }
- case ISD::MULHS:
- case ISD::MULHU: {
- if (NodeTy == MVT::i32)
- MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
- else
- MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
- Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
- return std::make_pair(true, Result);
- }
-
case ISD::ConstantFP: {
ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
@@ -460,7 +435,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
Node->getOperand(1), HiIdx };
SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- MVT::Untyped, Ops, 5);
+ MVT::Untyped, Ops);
return std::make_pair(true, Res);
}
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 6137ab0..a235e96 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -24,6 +24,12 @@ public:
explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
private:
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
+ MachineFunction &MF);
+
bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 4f21921..8544bb8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -15,6 +15,7 @@
#include "MipsTargetMachine.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -27,6 +28,9 @@ EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
: MipsTargetLowering(TM) {
// Set up the register classes
+
+ clearRegisterClasses();
+
addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
if (HasMips64)
@@ -42,12 +46,23 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
setOperationAction(Opc, VecTys[i], Expand);
+ setOperationAction(ISD::ADD, VecTys[i], Legal);
+ setOperationAction(ISD::SUB, VecTys[i], Legal);
setOperationAction(ISD::LOAD, VecTys[i], Legal);
setOperationAction(ISD::STORE, VecTys[i], Legal);
setOperationAction(ISD::BITCAST, VecTys[i], Legal);
}
+
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::VSELECT);
}
+ if (Subtarget->hasDSPR2())
+ setOperationAction(ISD::MUL, MVT::v2i16, Legal);
+
if (!TM.Options.UseSoftFloat) {
addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
@@ -65,14 +80,19 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::MULHS, MVT::i32, Custom);
setOperationAction(ISD::MULHU, MVT::i32, Custom);
- if (HasMips64)
+ if (HasMips64) {
+ setOperationAction(ISD::MULHS, MVT::i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::i64, Custom);
setOperationAction(ISD::MUL, MVT::i64, Custom);
+ }
+
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
@@ -113,7 +133,10 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
- case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG);
+ case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
+ DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
}
return MipsTargetLowering::LowerOperation(Op, DAG);
@@ -297,18 +320,136 @@ static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ const MipsSubtarget *Subtarget) {
+ // See if this is a vector splat immediate node.
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+
+ if (!BV ||
+ !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+ EltSize, !Subtarget->isLittle()) ||
+ (SplatBitSize != EltSize) ||
+ (SplatValue.getZExtValue() >= EltSize))
+ return SDValue();
+
+ return DAG.getNode(Opc, N->getDebugLoc(), Ty, N->getOperand(0),
+ DAG.getConstant(SplatValue.getZExtValue(), MVT::i32));
+}
+
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ EVT Ty = N->getValueType(0);
+
+ if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+ return SDValue();
+
+ return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
+}
+
+static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ EVT Ty = N->getValueType(0);
+
+ if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2()))
+ return SDValue();
+
+ return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
+}
+
+
+static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ EVT Ty = N->getValueType(0);
+
+ if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8))
+ return SDValue();
+
+ return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
+}
+
+static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
+ bool IsV216 = (Ty == MVT::v2i16);
+
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETNE: return true;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETGT:
+ case ISD::SETGE: return IsV216;
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE: return !IsV216;
+ default: return false;
+ }
+}
+
+static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT Ty = N->getValueType(0);
+
+ if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+ return SDValue();
+
+ if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
+ return SDValue();
+
+ return DAG.getNode(MipsISD::SETCC_DSP, N->getDebugLoc(), Ty, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+}
+
+static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT Ty = N->getValueType(0);
+
+ if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+ return SDValue();
+
+ SDValue SetCC = N->getOperand(0);
+
+ if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::SELECT_CC_DSP, N->getDebugLoc(), Ty,
+ SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1),
+ N->getOperand(2), SetCC.getOperand(2));
+}
+
SDValue
MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ SDValue Val;
switch (N->getOpcode()) {
case ISD::ADDE:
return performADDECombine(N, DAG, DCI, Subtarget);
case ISD::SUBE:
return performSUBECombine(N, DAG, DCI, Subtarget);
- default:
- return MipsTargetLowering::PerformDAGCombine(N, DCI);
+ case ISD::SHL:
+ return performSHLCombine(N, DAG, DCI, Subtarget);
+ case ISD::SRA:
+ return performSRACombine(N, DAG, DCI, Subtarget);
+ case ISD::SRL:
+ return performSRLCombine(N, DAG, DCI, Subtarget);
+ case ISD::VSELECT:
+ return performVSELECTCombine(N, DAG);
+ case ISD::SETCC: {
+ Val = performSETCCCombine(N, DAG);
+ break;
}
+ }
+
+ if (Val.getNode())
+ return Val;
+
+ return MipsTargetLowering::PerformDAGCombine(N, DCI);
}
MachineBasicBlock *
@@ -378,6 +519,171 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
return DAG.getMergeValues(Vals, 2, DL);
}
+
+static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
+ SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+ DAG.getConstant(0, MVT::i32));
+ SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+}
+
+static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
+ SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+ DAG.getConstant(Mips::sub_lo, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+ DAG.getConstant(Mips::sub_hi, MVT::i32));
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
+// This function expands mips intrinsic nodes which have 64-bit input operands
+// or output values.
+//
+// out64 = intrinsic-node in64
+// =>
+// lo = copy (extract-element (in64, 0))
+// hi = copy (extract-element (in64, 1))
+// mips-specific-node
+// v0 = copy lo
+// v1 = copy hi
+// out64 = merge-values (v0, v1)
+//
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
+ DebugLoc DL = Op.getDebugLoc();
+ bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
+ SmallVector<SDValue, 3> Ops;
+ unsigned OpNo = 0;
+
+ // See if Op has a chain input.
+ if (HasChainIn)
+ Ops.push_back(Op->getOperand(OpNo++));
+
+ // The next operand is the intrinsic opcode.
+ assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
+
+ // See if the next operand has type i64.
+ SDValue Opnd = Op->getOperand(++OpNo), In64;
+
+ if (Opnd.getValueType() == MVT::i64)
+ In64 = initAccumulator(Opnd, DL, DAG);
+ else
+ Ops.push_back(Opnd);
+
+ // Push the remaining operands.
+ for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
+ Ops.push_back(Op->getOperand(OpNo));
+
+ // Add In64 to the end of the list.
+ if (In64.getNode())
+ Ops.push_back(In64);
+
+ // Scan output.
+ SmallVector<EVT, 2> ResTys;
+
+ for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
+ I != E; ++I)
+ ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+
+ // Create node.
+ SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+ SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
+
+ if (!HasChainIn)
+ return Out;
+
+ assert(Val->getValueType(1) == MVT::Other);
+ SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
+ return DAG.getMergeValues(Vals, 2, DL);
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_shilo:
+ return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
+ case Intrinsic::mips_dpau_h_qbl:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
+ case Intrinsic::mips_dpau_h_qbr:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
+ case Intrinsic::mips_dpsu_h_qbl:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
+ case Intrinsic::mips_dpsu_h_qbr:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
+ case Intrinsic::mips_dpa_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
+ case Intrinsic::mips_dps_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
+ case Intrinsic::mips_dpax_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
+ case Intrinsic::mips_dpsx_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
+ case Intrinsic::mips_mulsa_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
+ case Intrinsic::mips_mult:
+ return lowerDSPIntr(Op, DAG, MipsISD::Mult);
+ case Intrinsic::mips_multu:
+ return lowerDSPIntr(Op, DAG, MipsISD::Multu);
+ case Intrinsic::mips_madd:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
+ case Intrinsic::mips_maddu:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
+ case Intrinsic::mips_msub:
+ return lowerDSPIntr(Op, DAG, MipsISD::MSub);
+ case Intrinsic::mips_msubu:
+ return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
+ }
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_extp:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
+ case Intrinsic::mips_extpdp:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
+ case Intrinsic::mips_extr_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
+ case Intrinsic::mips_extr_r_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
+ case Intrinsic::mips_extr_rs_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
+ case Intrinsic::mips_extr_s_h:
+ return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
+ case Intrinsic::mips_mthlip:
+ return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
+ case Intrinsic::mips_mulsaq_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
+ case Intrinsic::mips_maq_s_w_phl:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
+ case Intrinsic::mips_maq_s_w_phr:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
+ case Intrinsic::mips_maq_sa_w_phl:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
+ case Intrinsic::mips_maq_sa_w_phr:
+ return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
+ case Intrinsic::mips_dpaq_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
+ case Intrinsic::mips_dpsq_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
+ case Intrinsic::mips_dpaq_sa_l_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
+ case Intrinsic::mips_dpsq_sa_l_w:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
+ case Intrinsic::mips_dpaqx_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
+ case Intrinsic::mips_dpaqx_sa_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
+ case Intrinsic::mips_dpsqx_s_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
+ case Intrinsic::mips_dpsqx_sa_w_ph:
+ return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
+ }
+}
+
MachineBasicBlock * MipsSETargetLowering::
emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
// $bb:
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
index 186f6a3..ec8a5c7 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -31,6 +31,11 @@ namespace llvm {
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+ virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const {
+ return false;
+ }
+
virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
if (VT == MVT::Untyped)
return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
@@ -54,6 +59,9 @@ namespace llvm {
SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+
MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
MachineBasicBlock *BB) const;
};
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index ca0315e..a0768e5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -95,20 +95,39 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
Opc = Mips::MFC1;
- else if (SrcReg == Mips::HI)
+ else if (Mips::HIRegsRegClass.contains(SrcReg))
Opc = Mips::MFHI, SrcReg = 0;
- else if (SrcReg == Mips::LO)
+ else if (Mips::LORegsRegClass.contains(SrcReg))
Opc = Mips::MFLO, SrcReg = 0;
+ else if (Mips::HIRegsDSPRegClass.contains(SrcReg))
+ Opc = Mips::MFHI_DSP;
+ else if (Mips::LORegsDSPRegClass.contains(SrcReg))
+ Opc = Mips::MFLO_DSP;
+ else if (Mips::DSPCCRegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4)
+ .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
+ return;
+ }
}
else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
if (Mips::CCRRegClass.contains(DestReg))
Opc = Mips::CTC1;
else if (Mips::FGR32RegClass.contains(DestReg))
Opc = Mips::MTC1;
- else if (DestReg == Mips::HI)
+ else if (Mips::HIRegsRegClass.contains(DestReg))
Opc = Mips::MTHI, DestReg = 0;
- else if (DestReg == Mips::LO)
+ else if (Mips::LORegsRegClass.contains(DestReg))
Opc = Mips::MTLO, DestReg = 0;
+ else if (Mips::HIRegsDSPRegClass.contains(DestReg))
+ Opc = Mips::MTHI_DSP;
+ else if (Mips::LORegsDSPRegClass.contains(DestReg))
+ Opc = Mips::MTLO_DSP;
+ else if (Mips::DSPCCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Mips::WRDSP))
+ .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1 << 4)
+ .addReg(DestReg, RegState::ImplicitDefine);
+ return;
+ }
}
else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
Opc = Mips::FMOV_S;
@@ -121,27 +140,21 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
if (Mips::CPU64RegsRegClass.contains(SrcReg))
Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
- else if (SrcReg == Mips::HI64)
+ else if (Mips::HIRegs64RegClass.contains(SrcReg))
Opc = Mips::MFHI64, SrcReg = 0;
- else if (SrcReg == Mips::LO64)
+ else if (Mips::LORegs64RegClass.contains(SrcReg))
Opc = Mips::MFLO64, SrcReg = 0;
else if (Mips::FGR64RegClass.contains(SrcReg))
Opc = Mips::DMFC1;
}
else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
- if (DestReg == Mips::HI64)
+ if (Mips::HIRegs64RegClass.contains(DestReg))
Opc = Mips::MTHI64, DestReg = 0;
- else if (DestReg == Mips::LO64)
+ else if (Mips::LORegs64RegClass.contains(DestReg))
Opc = Mips::MTLO64, DestReg = 0;
else if (Mips::FGR64RegClass.contains(DestReg))
Opc = Mips::DMTC1;
}
- else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg))
- Opc = Mips::COPY_AC64;
- else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg))
- Opc = Mips::COPY_AC_DSP;
- else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg))
- Opc = Mips::COPY_AC128;
assert(Opc && "Cannot copy registers");
@@ -178,6 +191,8 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
+ else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::STORE_CCOND_DSP_P8 : Mips::STORE_CCOND_DSP;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -209,6 +224,8 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
+ else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
+ Opc = IsN64 ? Mips::LOAD_CCOND_DSP_P8 : Mips::LOAD_CCOND_DSP;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
index e11e5d1..14a2b27 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -11,29 +11,56 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-subtarget"
+
+#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
#include "Mips.h"
#include "MipsRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "MipsGenSubtargetInfo.inc"
+
using namespace llvm;
+// FIXME: Maybe this should be on by default when Mips16 is specified
+//
+static cl::opt<bool> Mixed16_32(
+ "mips-mixed-16-32",
+ cl::init(false),
+ cl::desc("Allow for a mixture of Mips16 "
+ "and Mips32 code in a single source file"),
+ cl::Hidden);
+
+static cl::opt<bool> Mips_Os16(
+ "mips-os16",
+ cl::init(false),
+ cl::desc("Compile all functions that don' use "
+ "floating point as Mips 16"),
+ cl::Hidden);
+
void MipsSubtarget::anchor() { }
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
- Reloc::Model _RM) :
+ Reloc::Model _RM, MipsTargetMachine *_TM) :
MipsGenSubtargetInfo(TT, CPU, FS),
MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
HasBitCount(false), HasFPIdx(false),
InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
- RM(_RM)
+ AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+ RM(_RM), OverrideMode(NoOverride), TM(_TM)
{
std::string CPUName = CPU;
if (CPUName.empty())
@@ -42,6 +69,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
+ PreviousInMips16Mode = InMips16Mode;
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
@@ -72,3 +101,48 @@ MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
&Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass);
return OptLevel >= CodeGenOpt::Aggressive;
}
+
+//FIXME: This logic for reseting the subtarget along with
+// the helper classes can probably be simplified but there are a lot of
+// cases so we will defer rewriting this to later.
+//
+void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
+ bool ChangeToMips16 = false, ChangeToNoMips16 = false;
+ DEBUG(dbgs() << "resetSubtargetFeatures" << "\n");
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ ChangeToMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ "mips16");
+ ChangeToNoMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ "nomips16");
+ assert (!(ChangeToMips16 & ChangeToNoMips16) &&
+ "mips16 and nomips16 specified on the same function");
+ if (ChangeToMips16) {
+ if (PreviousInMips16Mode)
+ return;
+ OverrideMode = Mips16Override;
+ PreviousInMips16Mode = true;
+ TM->setHelperClassesMips16();
+ return;
+ } else if (ChangeToNoMips16) {
+ if (!PreviousInMips16Mode)
+ return;
+ OverrideMode = NoMips16Override;
+ PreviousInMips16Mode = false;
+ TM->setHelperClassesMipsSE();
+ return;
+ } else {
+ if (OverrideMode == NoOverride)
+ return;
+ OverrideMode = NoOverride;
+ DEBUG(dbgs() << "back to default" << "\n");
+ if (inMips16Mode() && !PreviousInMips16Mode) {
+ TM->setHelperClassesMips16();
+ PreviousInMips16Mode = true;
+ } else if (!inMips16Mode() && PreviousInMips16Mode) {
+ TM->setHelperClassesMipsSE();
+ PreviousInMips16Mode = false;
+ }
+ return;
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 7a2e47c..f2f0e15 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -16,7 +16,9 @@
#include "MCTargetDesc/MipsReginfo.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -25,6 +27,8 @@
namespace llvm {
class StringRef;
+class MipsTargetMachine;
+
class MipsSubtarget : public MipsGenSubtargetInfo {
virtual void anchor();
@@ -89,12 +93,23 @@ protected:
// InMips16 -- can process Mips16 instructions
bool InMips16Mode;
+ // PreviousInMips16 -- the function we just processed was in Mips 16 Mode
+ bool PreviousInMips16Mode;
+
// InMicroMips -- can process MicroMips instructions
bool InMicroMipsMode;
// HasDSP, HasDSPR2 -- supports DSP ASE.
bool HasDSP, HasDSPR2;
+ // Allow mixed Mips16 and Mips32 in one source file
+ bool AllowMixed16_32;
+
+ // Optimize for space by compiling all functions as Mips 16 unless
+ // it needs floating point. Functions needing floating point are
+ // compiled as Mips32
+ bool Os16;
+
InstrItineraryData InstrItins;
// The instance to the register info section object
@@ -103,6 +118,12 @@ protected:
// Relocation Model
Reloc::Model RM;
+ // We can override the determination of whether we are in mips16 mode
+ // as from the command line
+ enum {NoOverride, Mips16Override, NoMips16Override} OverrideMode;
+
+ MipsTargetMachine *TM;
+
public:
virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
@@ -118,7 +139,8 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
MipsSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool little, Reloc::Model RM);
+ const std::string &FS, bool little, Reloc::Model RM,
+ MipsTargetMachine *TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -137,7 +159,20 @@ public:
bool isSingleFloat() const { return IsSingleFloat; }
bool isNotSingleFloat() const { return !IsSingleFloat; }
bool hasVFPU() const { return HasVFPU; }
- bool inMips16Mode() const { return InMips16Mode; }
+ bool inMips16Mode() const {
+ switch (OverrideMode) {
+ case NoOverride:
+ return InMips16Mode;
+ case Mips16Override:
+ return true;
+ case NoMips16Override:
+ return false;
+ }
+ llvm_unreachable("Unexpected mode");
+ }
+ bool inMips16ModeDefault() {
+ return InMips16Mode;
+ }
bool inMicroMipsMode() const { return InMicroMipsMode; }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
@@ -153,11 +188,20 @@ public:
bool hasBitCount() const { return HasBitCount; }
bool hasFPIdx() const { return HasFPIdx; }
+ bool allowMixed16_32() const { return AllowMixed16_32;};
+
+ bool os16() const { return Os16;};
+
// Grab MipsRegInfo object
const MipsReginfo &getMReginfo() const { return MRI; }
// Grab relocation model
Reloc::Model getRelocationModel() const {return RM;}
+
+ /// \brief Reset the subtarget for the Mips target.
+ void resetSubtarget(MachineFunction *MF);
+
+
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 3336358..ee28e2a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -15,11 +15,26 @@
#include "Mips.h"
#include "MipsFrameLowering.h"
#include "MipsInstrInfo.h"
+#include "MipsModuleISelDAGToDAG.h"
+#include "MipsOs16.h"
+#include "MipsSEFrameLowering.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSEISelLowering.h"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips16FrameLowering.h"
+#include "Mips16InstrInfo.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips16ISelLowering.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
+
+
extern "C" void LLVMInitializeMipsTarget() {
// Register the target.
RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
@@ -42,7 +57,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool isLittle)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle, RM),
+ Subtarget(TT, CPU, FS, isLittle, RM, this),
DL(isLittle ?
(Subtarget.isABI_N64() ?
"e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
@@ -54,9 +69,46 @@ MipsTargetMachine(const Target &T, StringRef TT,
"E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
InstrInfo(MipsInstrInfo::create(*this)),
FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
+ TLInfo(MipsTargetLowering::create(*this)),
+ TSInfo(*this), JITInfo() {
+}
+
+
+void MipsTargetMachine::setHelperClassesMips16() {
+ InstrInfoSE.swap(InstrInfo);
+ FrameLoweringSE.swap(FrameLowering);
+ TLInfoSE.swap(TLInfo);
+ if (!InstrInfo16) {
+ InstrInfo.reset(MipsInstrInfo::create(*this));
+ FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
+ TLInfo.reset(MipsTargetLowering::create(*this));
+ } else {
+ InstrInfo16.swap(InstrInfo);
+ FrameLowering16.swap(FrameLowering);
+ TLInfo16.swap(TLInfo);
+ }
+ assert(TLInfo && "null target lowering 16");
+ assert(InstrInfo && "null instr info 16");
+ assert(FrameLowering && "null frame lowering 16");
}
+void MipsTargetMachine::setHelperClassesMipsSE() {
+ InstrInfo16.swap(InstrInfo);
+ FrameLowering16.swap(FrameLowering);
+ TLInfo16.swap(TLInfo);
+ if (!InstrInfoSE) {
+ InstrInfo.reset(MipsInstrInfo::create(*this));
+ FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
+ TLInfo.reset(MipsTargetLowering::create(*this));
+ } else {
+ InstrInfoSE.swap(InstrInfo);
+ FrameLoweringSE.swap(FrameLowering);
+ TLInfoSE.swap(TLInfo);
+ }
+ assert(TLInfo && "null target lowering in SE");
+ assert(InstrInfo && "null instr info SE");
+ assert(FrameLowering && "null frame lowering SE");
+}
void MipsebTargetMachine::anchor() { }
MipsebTargetMachine::
@@ -90,6 +142,7 @@ public:
return *getMipsTargetMachine().getSubtargetImpl();
}
+ virtual void addIRPasses();
virtual bool addInstSelector();
virtual bool addPreEmitPass();
};
@@ -99,24 +152,50 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
return new MipsPassConfig(this, PM);
}
+void MipsPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ if (getMipsSubtarget().os16())
+ addPass(createMipsOs16(getMipsTargetMachine()));
+}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsPassConfig::addInstSelector() {
- addPass(createMipsISelDag(getMipsTargetMachine()));
+ if (getMipsSubtarget().allowMixed16_32()) {
+ addPass(createMipsModuleISelDag(getMipsTargetMachine()));
+ addPass(createMips16ISelDag(getMipsTargetMachine()));
+ addPass(createMipsSEISelDag(getMipsTargetMachine()));
+ } else {
+ addPass(createMipsISelDag(getMipsTargetMachine()));
+ }
return false;
}
+void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ if (Subtarget.allowMixed16_32()) {
+ DEBUG(errs() << "No ");
+ //FIXME: The Basic Target Transform Info
+ // pass needs to become a function pass instead of
+ // being an immutable pass and then this method as it exists now
+ // would be unnecessary.
+ PM.add(createNoTargetTransformInfoPass());
+ } else
+ LLVMTargetMachine::addAnalysisPasses(PM);
+ DEBUG(errs() << "Target Transform Info Pass Added\n");
+}
+
// Implemented by targets that want to run passes immediately before
// machine code is emitted. return true if -print-machineinstrs should
// print out the code after the passes.
bool MipsPassConfig::addPreEmitPass() {
MipsTargetMachine &TM = getMipsTargetMachine();
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
addPass(createMipsDelaySlotFillerPass(TM));
- // NOTE: long branch has not been implemented for mips16.
- if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
+ if (Subtarget.hasStandardEncoding() ||
+ Subtarget.allowMixed16_32())
addPass(createMipsLongBranchPass(TM));
- if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+ if (Subtarget.inMips16Mode() ||
+ Subtarget.allowMixed16_32())
addPass(createMipsConstantIslandPass(TM));
return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
index 7e5f192..ee55708 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.h
@@ -21,6 +21,8 @@
#include "MipsSelectionDAGInfo.h"
#include "MipsSubtarget.h"
#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -35,6 +37,12 @@ class MipsTargetMachine : public LLVMTargetMachine {
OwningPtr<const MipsInstrInfo> InstrInfo;
OwningPtr<const MipsFrameLowering> FrameLowering;
OwningPtr<const MipsTargetLowering> TLInfo;
+ OwningPtr<const MipsInstrInfo> InstrInfo16;
+ OwningPtr<const MipsFrameLowering> FrameLowering16;
+ OwningPtr<const MipsTargetLowering> TLInfo16;
+ OwningPtr<const MipsInstrInfo> InstrInfoSE;
+ OwningPtr<const MipsFrameLowering> FrameLoweringSE;
+ OwningPtr<const MipsTargetLowering> TLInfoSE;
MipsSelectionDAGInfo TSInfo;
MipsJITInfo JITInfo;
@@ -47,6 +55,8 @@ public:
virtual ~MipsTargetMachine() {}
+ virtual void addAnalysisPasses(PassManagerBase &PM);
+
virtual const MipsInstrInfo *getInstrInfo() const
{ return InstrInfo.get(); }
virtual const TargetFrameLowering *getFrameLowering() const
@@ -73,6 +83,13 @@ public:
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+
+ // Set helper classes
+ void setHelperClassesMips16();
+
+ void setHelperClassesMipsSE();
+
+
};
/// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index 6a53a44..072c65d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_NVPTX_H
#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
@@ -62,6 +63,9 @@ createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
+ModulePass *createGenericToNVVMPass();
+ModulePass *createNVVMReflectPass();
+ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ce5d78a..229e4e5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -68,11 +68,12 @@ InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
namespace {
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
/// depends.
-void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+void DiscoverDependentGlobals(const Value *V,
+ DenseSet<const GlobalVariable *> &Globals) {
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
Globals.insert(GV);
else {
- if (User *U = dyn_cast<User>(V)) {
+ if (const User *U = dyn_cast<User>(V)) {
for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
DiscoverDependentGlobals(U->getOperand(i), Globals);
}
@@ -84,8 +85,9 @@ void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
/// instances to be emitted, but only after any dependents have been added
/// first.
void VisitGlobalVariableForEmission(
- GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
- DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
+ const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
+ DenseSet<const GlobalVariable *> &Visited,
+ DenseSet<const GlobalVariable *> &Visiting) {
// Have we already visited this one?
if (Visited.count(GV))
return;
@@ -98,12 +100,12 @@ void VisitGlobalVariableForEmission(
Visiting.insert(GV);
// Make sure we visit all dependents first
- DenseSet<GlobalVariable *> Others;
+ DenseSet<const GlobalVariable *> Others;
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
DiscoverDependentGlobals(GV->getOperand(i), Others);
- for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
- E = Others.end();
+ for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
+ E = Others.end();
I != E; ++I)
VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
@@ -405,6 +407,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
SmallString<128> Str;
raw_svector_ostream O(Str);
+ if (!GlobalsEmitted) {
+ emitGlobals(*MF->getFunction()->getParent());
+ GlobalsEmitted = true;
+ }
+
// Set up
MRI = &MF->getRegInfo();
F = MF->getFunction();
@@ -695,7 +702,7 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
else
O << ".func ";
printReturnValStr(F, O);
- O << *CurrentFnSym << "\n";
+ O << *Mang->getSymbol(F) << "\n";
emitFunctionParamList(F, O);
O << ";\n";
}
@@ -795,7 +802,7 @@ static bool useFuncSeen(const Constant *C,
return false;
}
-void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = FI;
@@ -805,7 +812,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
continue;
if (F->getIntrinsicID())
continue;
- CurrentFnSym = Mang->getSymbol(F);
emitDeclaration(F, O);
continue;
}
@@ -817,14 +823,12 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
// The use is in the initialization of a global variable
// that is a function pointer, so print a declaration
// for the original function
- CurrentFnSym = Mang->getSymbol(F);
emitDeclaration(F, O);
break;
}
// Emit a declaration of this function if the function that
// uses this constant expr has already been seen.
if (useFuncSeen(C, seenMap)) {
- CurrentFnSym = Mang->getSymbol(F);
emitDeclaration(F, O);
break;
}
@@ -844,7 +848,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
// appearing in the module before the callee. so print out
// a declaration for the callee.
if (seenMap.find(caller) != seenMap.end()) {
- CurrentFnSym = Mang->getSymbol(F);
emitDeclaration(F, O);
break;
}
@@ -921,6 +924,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
recordAndEmitFilenames(M);
+ GlobalsEmitted = false;
+
+ return false; // success
+}
+
+void NVPTXAsmPrinter::emitGlobals(const Module &M) {
SmallString<128> Str2;
raw_svector_ostream OS2(Str2);
@@ -931,13 +940,13 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
// global variable in order, and ensure that we emit it *after* its dependent
// globals. We use a little extra memory maintaining both a set and a list to
// have fast searches while maintaining a strict ordering.
- SmallVector<GlobalVariable *, 8> Globals;
- DenseSet<GlobalVariable *> GVVisited;
- DenseSet<GlobalVariable *> GVVisiting;
+ SmallVector<const GlobalVariable *, 8> Globals;
+ DenseSet<const GlobalVariable *> GVVisited;
+ DenseSet<const GlobalVariable *> GVVisiting;
// Visit each global variable, in order
- for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
- ++I)
+ for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
assert(GVVisited.size() == M.getGlobalList().size() &&
@@ -951,7 +960,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
OS2 << '\n';
OutStreamer.EmitRawText(OS2.str());
- return false; // success
}
void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
@@ -989,6 +997,14 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
}
bool NVPTXAsmPrinter::doFinalization(Module &M) {
+
+ // If we did not emit any functions, then the global declarations have not
+ // yet been emitted.
+ if (!GlobalsEmitted) {
+ emitGlobals(M);
+ GlobalsEmitted = true;
+ }
+
// XXX Temproarily remove global variables so that doFinalization() will not
// emit them again (global variables are emitted at beginning).
@@ -1063,7 +1079,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
}
}
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
+ raw_ostream &O,
bool processDemoted) {
// Skip meta data
@@ -1107,10 +1124,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (llvm::isSampler(*GVar)) {
O << ".global .samplerref " << llvm::getSamplerName(*GVar);
- Constant *Initializer = NULL;
+ const Constant *Initializer = NULL;
if (GVar->hasInitializer())
Initializer = GVar->getInitializer();
- ConstantInt *CI = NULL;
+ const ConstantInt *CI = NULL;
if (Initializer)
CI = dyn_cast<ConstantInt>(Initializer);
if (CI) {
@@ -1183,7 +1200,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (localDecls.find(demotedFunc) != localDecls.end())
localDecls[demotedFunc].push_back(GVar);
else {
- std::vector<GlobalVariable *> temp;
+ std::vector<const GlobalVariable *> temp;
temp.push_back(GVar);
localDecls[demotedFunc] = temp;
}
@@ -1199,7 +1216,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
O << " .";
- O << getPTXFundamentalTypeStr(ETy, false);
+ // Special case: ABI requires that we use .u8 for predicates
+ if (ETy->isIntegerTy(1))
+ O << "u8";
+ else
+ O << getPTXFundamentalTypeStr(ETy, false);
O << " ";
O << *Mang->getSymbol(GVar);
@@ -1209,7 +1230,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
GVar->hasInitializer()) {
- Constant *Initializer = GVar->getInitializer();
+ const Constant *Initializer = GVar->getInitializer();
if (!Initializer->isNullValue()) {
O << " = ";
printScalarConstant(Initializer, O);
@@ -1233,7 +1254,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
GVar->hasInitializer()) {
- Constant *Initializer = GVar->getInitializer();
+ const Constant *Initializer = GVar->getInitializer();
if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
AggBuffer aggBuffer(ElementSize, O, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
@@ -1283,7 +1304,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
if (localDecls.find(f) == localDecls.end())
return;
- std::vector<GlobalVariable *> &gvars = localDecls[f];
+ std::vector<const GlobalVariable *> &gvars = localDecls[f];
for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
O << "\t// demoted variable\n\t";
@@ -1448,7 +1469,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
(nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
- O << *CurrentFnSym << "_param_" << paramIndex;
+ O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
else {
std::string argName = I->getName();
const char *p = argName.c_str();
@@ -1507,11 +1528,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (llvm::isImage(*I)) {
std::string sname = I->getName();
if (llvm::isImageWriteOnly(*I))
- O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
+ O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
+ << paramIndex;
else // Default image is read_only
- O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
+ O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
+ << paramIndex;
} else // Should be llvm::isSampler(*I)
- O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
+ O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
<< paramIndex;
continue;
}
@@ -1564,7 +1587,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
// non-pointer scalar to kernel func
- O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
+ O << "\t.param .";
+ // Special case: predicate operands become .u8 types
+ if (Ty->isIntegerTy(1))
+ O << "u8";
+ else
+ O << getPTXFundamentalTypeStr(Ty);
+ O << " ";
printParamName(I, paramIndex, O);
continue;
}
@@ -1751,12 +1780,12 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
O << utohexstr(API.getZExtValue());
}
-void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
O << CI->getValue();
return;
}
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
printFPConstant(CFP, O);
return;
}
@@ -1764,13 +1793,13 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
O << "0";
return;
}
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
O << *Mang->getSymbol(GVar);
return;
}
- if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- Value *v = Cexpr->stripPointerCasts();
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ const Value *v = Cexpr->stripPointerCasts();
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
O << *Mang->getSymbol(GVar);
return;
} else {
@@ -1781,7 +1810,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
-void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
+void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
const DataLayout *TD = TM.getDataLayout();
@@ -1809,13 +1838,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
ptr = (unsigned char *)&int16;
aggBuffer->addBytes(ptr, 2, Bytes);
} else if (ETy == Type::getInt32Ty(CPV->getContext())) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
break;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, TD))) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
@@ -1831,13 +1860,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
llvm_unreachable("unsupported integer const type");
} else if (ETy == Type::getInt64Ty(CPV->getContext())) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
break;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, TD))) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
@@ -1858,7 +1887,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
case Type::FloatTyID:
case Type::DoubleTyID: {
- ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+ const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
const Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
@@ -1874,10 +1903,10 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
break;
}
case Type::PointerTyID: {
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
aggBuffer->addSymbol(GVar);
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- Value *v = Cexpr->stripPointerCasts();
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ const Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v);
}
unsigned int s = TD->getTypeAllocSize(CPV->getType());
@@ -1906,7 +1935,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
}
}
-void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
+void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
const DataLayout *TD = TM.getDataLayout();
int Bytes;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 6dc9fc0..7faa6b2 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned char *buffer; // the buffer
unsigned numSymbols; // number of symbol addresses
SmallVector<unsigned, 4> symbolPosInBuffer;
- SmallVector<Value *, 4> Symbols;
+ SmallVector<const Value *, 4> Symbols;
private:
unsigned curpos;
@@ -128,7 +128,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
}
return curpos;
}
- void addSymbol(Value *GVar) {
+ void addSymbol(const Value *GVar) {
symbolPosInBuffer.push_back(curpos);
Symbols.push_back(GVar);
numSymbols++;
@@ -153,11 +153,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (pos)
O << ", ";
if (pos == nextSymbolPos) {
- Value *v = Symbols[nSym];
- if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+ const Value *v = Symbols[nSym];
+ if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.Mang->getSymbol(GVar);
O << *Name;
- } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
+ } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
} else
llvm_unreachable("symbol type unknown");
@@ -205,10 +205,12 @@ private:
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
// definition autogenerated.
void printInstruction(const MachineInstr *MI, raw_ostream &O);
- void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
+ void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
+ bool = false);
void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
+ void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
@@ -234,6 +236,8 @@ protected:
private:
std::string CurrentBankselLabelInBasicBlock;
+ bool GlobalsEmitted;
+
// This is specific per MachineFunction.
const MachineRegisterInfo *MRI;
// The contents are specific for each
@@ -247,7 +251,7 @@ private:
std::map<const Type *, std::string> TypeNameMap;
// List of variables demoted to a function scope.
- std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
+ std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
// To record filename to ID mapping
std::map<std::string, unsigned> filenameMap;
@@ -256,15 +260,15 @@ private:
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
- void printScalarConstant(Constant *CPV, raw_ostream &O);
+ void printScalarConstant(const Constant *CPV, raw_ostream &O);
void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
- void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
- void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
+ void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+ void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
void printOperandProper(const MachineOperand &MO);
void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
- void emitDeclarations(Module &, raw_ostream &O);
+ void emitDeclarations(const Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
new file mode 100644
index 0000000..1077c46
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -0,0 +1,436 @@
+//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Convert generic global variables into either .global or .const access based
+// on the variable's "constant" qualifier.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+
+#include "llvm/PassManager.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeGenericToNVVMPass(PassRegistry &);
+}
+
+namespace {
+class GenericToNVVM : public ModulePass {
+public:
+ static char ID;
+
+ GenericToNVVM() : ModulePass(ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ }
+
+private:
+ Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
+ IRBuilder<> &Builder);
+ Value *remapConstant(Module *M, Function *F, Constant *C,
+ IRBuilder<> &Builder);
+ Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
+ Constant *C,
+ IRBuilder<> &Builder);
+ Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+ IRBuilder<> &Builder);
+ void remapNamedMDNode(Module *M, NamedMDNode *N);
+ MDNode *remapMDNode(Module *M, MDNode *N);
+
+ typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
+ typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
+ GVMapTy GVMap;
+ ConstantToValueMapTy ConstantToValueMap;
+};
+}
+
+char GenericToNVVM::ID = 0;
+
+ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); }
+
+INITIALIZE_PASS(
+ GenericToNVVM, "generic-to-nvvm",
+ "Ensure that the global variables are in the global address space", false,
+ false)
+
+bool GenericToNVVM::runOnModule(Module &M) {
+ // Create a clone of each global variable that has the default address space.
+ // The clone is created with the global address space specifier, and the pair
+ // of original global variable and its clone is placed in the GVMap for later
+ // use.
+
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E;) {
+ GlobalVariable *GV = I++;
+ if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
+ !llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
+ !GV->getName().startswith("llvm.")) {
+ GlobalVariable *NewGV = new GlobalVariable(
+ M, GV->getType()->getElementType(), GV->isConstant(),
+ GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL,
+ "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
+ NewGV->copyAttributesFrom(GV);
+ GVMap[GV] = NewGV;
+ }
+ }
+
+ // Return immediately, if every global variable has a specific address space
+ // specifier.
+ if (GVMap.empty()) {
+ return false;
+ }
+
+ // Walk through the instructions in function defitinions, and replace any use
+ // of original global variables in GVMap with a use of the corresponding
+ // copies in GVMap. If necessary, promote constants to instructions.
+ for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ if (I->isDeclaration()) {
+ continue;
+ }
+ IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg());
+ for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE;
+ ++BBI) {
+ for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE;
+ ++II) {
+ for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) {
+ Value *Operand = II->getOperand(i);
+ if (isa<Constant>(Operand)) {
+ II->setOperand(
+ i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+ }
+ }
+ }
+ }
+ ConstantToValueMap.clear();
+ }
+
+ // Walk through the metadata section and update the debug information
+ // associated with the global variables in the default address space.
+ for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+ E = M.named_metadata_end();
+ I != E; I++) {
+ remapNamedMDNode(&M, I);
+ }
+
+ // Walk through the global variable initializers, and replace any use of
+ // original global variables in GVMap with a use of the corresponding copies
+ // in GVMap. The copies need to be bitcast to the original global variable
+ // types, as we cannot use cvta in global variable initializers.
+ for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
+ GlobalVariable *GV = I->first;
+ GlobalVariable *NewGV = I->second;
+ ++I;
+ Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType());
+ // At this point, the remaining uses of GV should be found only in global
+ // variable initializers, as other uses have been already been removed
+ // while walking through the instructions in function definitions.
+ for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
+ UI != UE;) {
+ Use &U = (UI++).getUse();
+ U.set(BitCastNewGV);
+ }
+ std::string Name = GV->getName();
+ GV->removeDeadConstantUsers();
+ GV->eraseFromParent();
+ NewGV->setName(Name);
+ }
+ GVMap.clear();
+
+ return true;
+}
+
+Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
+ GlobalVariable *GV,
+ IRBuilder<> &Builder) {
+ PointerType *GVType = GV->getType();
+ Value *CVTA = NULL;
+
+ // See if the address space conversion requires the operand to be bitcast
+ // to i8 addrspace(n)* first.
+ EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true);
+ if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
+ // A bitcast to i8 addrspace(n)* on the operand is needed.
+ LLVMContext &Context = M->getContext();
+ unsigned int AddrSpace = GVType->getAddressSpace();
+ Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
+ CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
+ // Insert the address space conversion.
+ Type *ResultType =
+ PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
+ SmallVector<Type *, 2> ParamTypes;
+ ParamTypes.push_back(ResultType);
+ ParamTypes.push_back(DestTy);
+ Function *CVTAFunction = Intrinsic::getDeclaration(
+ M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+ CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
+ // Another bitcast from i8 * to <the element type of GVType> * is
+ // required.
+ DestTy =
+ PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC);
+ CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
+ } else {
+ // A simple CVTA is enough.
+ SmallVector<Type *, 2> ParamTypes;
+ ParamTypes.push_back(PointerType::get(GVType->getElementType(),
+ llvm::ADDRESS_SPACE_GENERIC));
+ ParamTypes.push_back(GVType);
+ Function *CVTAFunction = Intrinsic::getDeclaration(
+ M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+ CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
+ }
+
+ return CVTA;
+}
+
+Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
+ IRBuilder<> &Builder) {
+ // If the constant C has been converted already in the given function F, just
+ // return the converted value.
+ ConstantToValueMapTy::iterator CTII = ConstantToValueMap.find(C);
+ if (CTII != ConstantToValueMap.end()) {
+ return CTII->second;
+ }
+
+ Value *NewValue = C;
+ if (isa<GlobalVariable>(C)) {
+ // If the constant C is a global variable and is found in GVMap, generate a
+ // set set of instructions that convert the clone of C with the global
+ // address space specifier to a generic pointer.
+ // The constant C cannot be used here, as it will be erased from the
+ // module eventually. And the clone of C with the global address space
+ // specifier cannot be used here either, as it will affect the types of
+ // other instructions in the function. Hence, this address space conversion
+ // is required.
+ GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C));
+ if (I != GVMap.end()) {
+ NewValue = getOrInsertCVTA(M, F, I->second, Builder);
+ }
+ } else if (isa<ConstantVector>(C) || isa<ConstantArray>(C) ||
+ isa<ConstantStruct>(C)) {
+ // If any element in the constant vector or aggregate C is or uses a global
+ // variable in GVMap, the constant C needs to be reconstructed, using a set
+ // of instructions.
+ NewValue = remapConstantVectorOrConstantAggregate(M, F, C, Builder);
+ } else if (isa<ConstantExpr>(C)) {
+ // If any operand in the constant expression C is or uses a global variable
+ // in GVMap, the constant expression C needs to be reconstructed, using a
+ // set of instructions.
+ NewValue = remapConstantExpr(M, F, cast<ConstantExpr>(C), Builder);
+ }
+
+ ConstantToValueMap[C] = NewValue;
+ return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantVectorOrConstantAggregate(
+ Module *M, Function *F, Constant *C, IRBuilder<> &Builder) {
+ bool OperandChanged = false;
+ SmallVector<Value *, 4> NewOperands;
+ unsigned NumOperands = C->getNumOperands();
+
+ // Check if any element is or uses a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = C->getOperand(i);
+ Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the elements has been modified, return C as it is.
+ if (!OperandChanged) {
+ return C;
+ }
+
+ // If any of the elements has been modified, construct the equivalent
+ // vector or aggregate value with a set instructions and the converted
+ // elements.
+ Value *NewValue = UndefValue::get(C->getType());
+ if (isa<ConstantVector>(C)) {
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i);
+ NewValue = Builder.CreateInsertElement(NewValue, NewOperands[i], Idx);
+ }
+ } else {
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ NewValue =
+ Builder.CreateInsertValue(NewValue, NewOperands[i], makeArrayRef(i));
+ }
+ }
+
+ return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+ IRBuilder<> &Builder) {
+ bool OperandChanged = false;
+ SmallVector<Value *, 4> NewOperands;
+ unsigned NumOperands = C->getNumOperands();
+
+ // Check if any operand is or uses a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = C->getOperand(i);
+ Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return C as it is.
+ if (!OperandChanged) {
+ return C;
+ }
+
+ // If any of the operands has been modified, construct the instruction with
+ // the converted operands.
+ unsigned Opcode = C->getOpcode();
+ switch (Opcode) {
+ case Instruction::ICmp:
+ // CompareConstantExpr (icmp)
+ return Builder.CreateICmp(CmpInst::Predicate(C->getPredicate()),
+ NewOperands[0], NewOperands[1]);
+ case Instruction::FCmp:
+ // CompareConstantExpr (fcmp)
+ assert(false && "Address space conversion should have no effect "
+ "on float point CompareConstantExpr (fcmp)!");
+ return C;
+ case Instruction::ExtractElement:
+ // ExtractElementConstantExpr
+ return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
+ case Instruction::InsertElement:
+ // InsertElementConstantExpr
+ return Builder.CreateInsertElement(NewOperands[0], NewOperands[1],
+ NewOperands[2]);
+ case Instruction::ShuffleVector:
+ // ShuffleVector
+ return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1],
+ NewOperands[2]);
+ case Instruction::ExtractValue:
+ // ExtractValueConstantExpr
+ return Builder.CreateExtractValue(NewOperands[0], C->getIndices());
+ case Instruction::InsertValue:
+ // InsertValueConstantExpr
+ return Builder.CreateInsertValue(NewOperands[0], NewOperands[1],
+ C->getIndices());
+ case Instruction::GetElementPtr:
+ // GetElementPtrConstantExpr
+ return cast<GEPOperator>(C)->isInBounds()
+ ? Builder.CreateGEP(
+ NewOperands[0],
+ makeArrayRef(&NewOperands[1], NumOperands - 1))
+ : Builder.CreateInBoundsGEP(
+ NewOperands[0],
+ makeArrayRef(&NewOperands[1], NumOperands - 1));
+ case Instruction::Select:
+ // SelectConstantExpr
+ return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]);
+ default:
+ // BinaryConstantExpr
+ if (Instruction::isBinaryOp(Opcode)) {
+ return Builder.CreateBinOp(Instruction::BinaryOps(C->getOpcode()),
+ NewOperands[0], NewOperands[1]);
+ }
+ // UnaryConstantExpr
+ if (Instruction::isCast(Opcode)) {
+ return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
+ NewOperands[0], C->getType());
+ }
+ assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
+ return C;
+ }
+}
+
+void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
+
+ bool OperandChanged = false;
+ SmallVector<MDNode *, 16> NewOperands;
+ unsigned NumOperands = N->getNumOperands();
+
+ // Check if any operand is or contains a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ MDNode *Operand = N->getOperand(i);
+ MDNode *NewOperand = remapMDNode(M, Operand);
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return immediately.
+ if (!OperandChanged) {
+ return;
+ }
+
+ // Replace the old operands with the new operands.
+ N->dropAllReferences();
+ for (SmallVector<MDNode *, 16>::iterator I = NewOperands.begin(),
+ E = NewOperands.end();
+ I != E; ++I) {
+ N->addOperand(*I);
+ }
+}
+
+MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
+
+ bool OperandChanged = false;
+ SmallVector<Value *, 8> NewOperands;
+ unsigned NumOperands = N->getNumOperands();
+
+ // Check if any operand is or contains a global variable in GVMap, and thus
+ // converted to another value.
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Value *Operand = N->getOperand(i);
+ Value *NewOperand = Operand;
+ if (Operand) {
+ if (isa<GlobalVariable>(Operand)) {
+ GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
+ if (I != GVMap.end()) {
+ NewOperand = I->second;
+ if (++i < NumOperands) {
+ NewOperands.push_back(NewOperand);
+ // Address space of the global variable follows the global variable
+ // in the global variable debug info (see createGlobalVariable in
+ // lib/Analysis/DIBuilder.cpp).
+ NewOperand =
+ ConstantInt::get(Type::getInt32Ty(M->getContext()),
+ I->second->getType()->getAddressSpace());
+ }
+ }
+ } else if (isa<MDNode>(Operand)) {
+ NewOperand = remapMDNode(M, cast<MDNode>(Operand));
+ }
+ }
+ OperandChanged |= Operand != NewOperand;
+ NewOperands.push_back(NewOperand);
+ }
+
+ // If none of the operands has been modified, return N as it is.
+ if (!OperandChanged) {
+ return N;
+ }
+
+ // If any of the operands has been modified, create a new MDNode with the new
+ // operands.
+ return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index e862988..d4378c2 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -42,6 +42,11 @@ static cl::opt<int> UsePrecDivF32(
" IEEE Compliant F32 div.rnd if avaiable."),
cl::init(2));
+static cl::opt<bool>
+UsePrecSqrtF32("nvptx-prec-sqrtf32",
+ cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
+ cl::init(true));
+
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
// Decide how to translate f32 div
do_DIVF32_PREC = UsePrecDivF32;
+ // Decide how to translate f32 sqrt
+ do_SQRTF32_PREC = UsePrecSqrtF32;
// sm less than sm_20 does not support div.rnd. Use div.full.
if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
do_DIVF32_PREC = 1;
@@ -241,7 +248,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(fromType),
getI32Imm(fromTypeWidth), Addr, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (Subtarget.is64Bit()
? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
@@ -270,7 +277,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(fromType),
getI32Imm(fromTypeWidth), Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (Subtarget.is64Bit()
? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
@@ -324,7 +331,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(fromType),
getI32Imm(fromTypeWidth), Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (Subtarget.is64Bit()) {
switch (TargetVT) {
@@ -376,7 +383,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(fromType),
getI32Imm(fromTypeWidth), N1, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+ NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
}
if (NVPTXLD != NULL) {
@@ -501,7 +508,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
getI32Imm(VecType), getI32Imm(FromType),
getI32Imm(FromTypeWidth), Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else if (Subtarget.is64Bit()
? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
@@ -555,7 +562,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
getI32Imm(VecType), getI32Imm(FromType),
getI32Imm(FromTypeWidth), Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else if (Subtarget.is64Bit()
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
@@ -659,7 +666,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
getI32Imm(VecType), getI32Imm(FromType),
getI32Imm(FromTypeWidth), Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
@@ -760,7 +767,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
getI32Imm(VecType), getI32Imm(FromType),
getI32Imm(FromTypeWidth), Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -962,7 +969,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
@@ -1055,7 +1062,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(toType),
getI32Imm(toTypeWidth), Addr, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
} else if (Subtarget.is64Bit()
? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
@@ -1084,7 +1091,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(toType),
getI32Imm(toTypeWidth), Base, Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
} else if (Subtarget.is64Bit()
? SelectADDRri64(N2.getNode(), N2, Base, Offset)
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
@@ -1138,7 +1145,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(toType),
getI32Imm(toTypeWidth), Base, Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
} else {
if (Subtarget.is64Bit()) {
switch (SourceVT) {
@@ -1190,7 +1197,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(toType),
getI32Imm(toTypeWidth), N2, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+ NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
}
if (NVPTXST != NULL) {
@@ -1569,7 +1576,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
StOps.push_back(Chain);
- ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+ ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 70e8e46..ed16d44 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -41,6 +41,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
// Otherwise, use div.full
int do_DIVF32_PREC;
+ // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ
+ // is true, then generate the corresponding FTZ version.
+ bool do_SQRTF32_PREC;
+
// If true, add .ftz to f32 instructions.
// This is only meaningful for sm_20 and later, as the default
// is not ftz.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index f43abe2..da6dd39 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -75,6 +75,9 @@ def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
+def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
+def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
+
def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
def true : Predicate<"1">;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 49e2568..24037ca 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -512,6 +512,16 @@ def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
Float64Regs, int_nvvm_sqrt_rp_d>;
+// nvvm_sqrt intrinsic
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+ (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
+
//
// Rsqrt
//
@@ -1510,38 +1520,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> {
defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
+defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
-
-def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
-def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
-
-
-
-// @TODO: Revisit this. There is a type
-// contradiction between iPTRAny and iPTR for the def.
-/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
- (Wrapper tglobaladdr:$src)))]>;
-def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
- (Wrapper tglobaladdr:$src)))]>;*/
-
-
-def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- "mov.u32 \t$result, $src;",
- [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
-def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- "mov.u64 \t$result, $src;",
- [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
+defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
// nvvm.ptr.gen.to.param
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
index e166be5..e57ace9 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
@@ -32,7 +32,8 @@ public:
/// Override this as NVPTX has its own way of printing switching
/// to a section.
virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS) const {}
+ raw_ostream &OS,
+ const MCExpr *Subsection) const {}
/// Base address of PTX sections is zero.
virtual bool isBaseAddressKnownZero() const { return true; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 67ca6b5..1ae2a7c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -49,6 +49,7 @@ using namespace llvm;
namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
+void initializeGenericToNVVMPass(PassRegistry&);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -62,6 +63,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+ initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
}
NVPTXTargetMachine::NVPTXTargetMachine(
@@ -100,6 +102,7 @@ public:
return getTM<NVPTXTargetMachine>();
}
+ virtual void addIRPasses();
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
};
@@ -110,6 +113,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return PassConfig;
}
+void NVPTXPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ addPass(createGenericToNVVMPass());
+}
+
bool NVPTXPassConfig::addInstSelector() {
addPass(createLowerAggrCopies());
addPass(createSplitBBatBarPass());
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 0ad62ce..3cc324b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -14,6 +14,7 @@
//
//===----------------------------------------------------------------------===//
+#include "NVPTX.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
@@ -40,7 +41,7 @@ using namespace llvm;
namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
namespace {
-class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+class NVVMReflect : public ModulePass {
private:
StringMap<int> VarMap;
typedef DenseMap<std::string, int>::iterator VarMapIter;
@@ -48,9 +49,18 @@ private:
public:
static char ID;
- NVVMReflect() : ModulePass(ID) {
+ NVVMReflect() : ModulePass(ID), ReflectFunction(0) {
+ initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
VarMap.clear();
- ReflectFunction = 0;
+ }
+
+ NVVMReflect(const StringMap<int> &Mapping)
+ : ModulePass(ID), ReflectFunction(0) {
+ initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+ for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();
+ I != E; ++I) {
+ VarMap[(*I).getKey()] = (*I).getValue();
+ }
}
void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
@@ -60,6 +70,14 @@ public:
};
}
+ModulePass *llvm::createNVVMReflectPass() {
+ return new NVVMReflect();
+}
+
+ModulePass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping) {
+ return new NVVMReflect(Mapping);
+}
+
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
cl::desc("NVVM reflection, enabled by default"));
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
new file mode 100644
index 0000000..f2cb8b8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -0,0 +1,739 @@
+//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+static unsigned RRegs[32] = {
+ PPC::R0, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned RRegsNoR0[32] = {
+ PPC::ZERO,
+ PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned XRegs[32] = {
+ PPC::X0, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned XRegsNoX0[32] = {
+ PPC::ZERO8,
+ PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned FRegs[32] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+static unsigned VRegs[32] = {
+ PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+ PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+ PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+ PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+static unsigned CRBITRegs[32] = {
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+static unsigned CRRegs[8] = {
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+struct PPCOperand;
+
+class PPCAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+ bool IsPPC64;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ bool isPPC64() const { return IsPPC64; }
+
+ bool MatchRegisterName(const AsmToken &Tok,
+ unsigned &RegNo, int64_t &IntVal);
+
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+ bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveTC(unsigned Size, SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm);
+
+ void ProcessInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "PPCGenAsmMatcher.inc"
+
+ /// }
+
+
+public:
+ PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ // Check for 64-bit vs. 32-bit pointer mode.
+ Triple TheTriple(STI.getTargetTriple());
+ IsPPC64 = TheTriple.getArch() == Triple::ppc64;
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ virtual bool ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+ virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// PPCOperand - Instances of this class represent a parsed PowerPC machine
+/// instruction.
+struct PPCOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate,
+ Expression
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+ bool IsPPC64;
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct ImmOp {
+ int64_t Val;
+ };
+
+ struct ExprOp {
+ const MCExpr *Val;
+ };
+
+ union {
+ struct TokOp Tok;
+ struct ImmOp Imm;
+ struct ExprOp Expr;
+ };
+
+ PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ IsPPC64 = o.IsPPC64;
+ switch (Kind) {
+ case Token:
+ Tok = o.Tok;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Expression:
+ Expr = o.Expr;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ /// isPPC64 - True if this operand is for an instruction in 64-bit mode.
+ bool isPPC64() const { return IsPPC64; }
+
+ int64_t getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getExpr() const {
+ assert(Kind == Expression && "Invalid access!");
+ return Expr.Val;
+ }
+
+ unsigned getReg() const {
+ assert(isRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
+ unsigned getCCReg() const {
+ assert(isCCRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
+ unsigned getCRBitMask() const {
+ assert(isCRBitMask() && "Invalid access!");
+ return 7 - CountTrailingZeros_32(Imm.Val);
+ }
+
+ bool isToken() const { return Kind == Token; }
+ bool isImm() const { return Kind == Immediate || Kind == Expression; }
+ bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
+ bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
+ bool isU16Imm() const { return Kind == Expression ||
+ (Kind == Immediate && isUInt<16>(getImm())); }
+ bool isS16Imm() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<16>(getImm())); }
+ bool isS16ImmX4() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<16>(getImm()) &&
+ (getImm() & 3) == 0); }
+ bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isCCRegNumber() const { return Kind == Immediate &&
+ isUInt<3>(getImm()); }
+ bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
+ isPowerOf2_32(getImm()); }
+ bool isMem() const { return false; }
+ bool isReg() const { return false; }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("addRegOperands");
+ }
+
+ void addRegGPRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()]));
+ }
+
+ void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()]));
+ }
+
+ void addRegG8RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()]));
+ }
+
+ void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()]));
+ }
+
+ void addRegGxRCOperands(MCInst &Inst, unsigned N) const {
+ if (isPPC64())
+ addRegG8RCOperands(Inst, N);
+ else
+ addRegGPRCOperands(Inst, N);
+ }
+
+ void addRegGxRCNoR0Operands(MCInst &Inst, unsigned N) const {
+ if (isPPC64())
+ addRegG8RCNoX0Operands(Inst, N);
+ else
+ addRegGPRCNoR0Operands(Inst, N);
+ }
+
+ void addRegF4RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+ }
+
+ void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+ }
+
+ void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
+ }
+
+ void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getReg()]));
+ }
+
+ void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()]));
+ }
+
+ void addCRBitMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()]));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (Kind == Immediate)
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ }
+
+ void addDispRIOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (Kind == Immediate)
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ }
+
+ void addDispRIXOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (Kind == Immediate)
+ Inst.addOperand(MCOperand::CreateImm(getImm() / 4));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
+ PPCOperand *Op = new PPCOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+ PPCOperand *Op = new PPCOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static PPCOperand *CreateExpr(const MCExpr *Val,
+ SMLoc S, SMLoc E, bool IsPPC64) {
+ PPCOperand *Op = new PPCOperand(Expression);
+ Op->Expr.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+};
+
+} // end anonymous namespace.
+
+void PPCOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case Immediate:
+ OS << getImm();
+ break;
+ case Expression:
+ getExpr()->print(OS);
+ break;
+ }
+}
+
+
+void PPCAsmParser::
+ProcessInstruction(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ switch (Inst.getOpcode()) {
+ case PPC::SLWI: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::RLWINM);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SRWI: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::RLWINM);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SLDI: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::RLDICR);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SRDI: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::RLDICL);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ Inst = TmpInst;
+ break;
+ }
+ }
+}
+
+bool PPCAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+ default: break;
+ case Match_Success:
+ // Post-process instructions (typically extended mnemonics)
+ ProcessInstruction(Inst, Operands);
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool PPCAsmParser::
+MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef Name = Tok.getString();
+
+ if (Name.equals_lower("lr")) {
+ RegNo = isPPC64()? PPC::LR8 : PPC::LR;
+ IntVal = 8;
+ return false;
+ } else if (Name.equals_lower("ctr")) {
+ RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
+ IntVal = 9;
+ return false;
+ } else if (Name.substr(0, 1).equals_lower("r") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
+ return false;
+ } else if (Name.substr(0, 1).equals_lower("f") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = FRegs[IntVal];
+ return false;
+ } else if (Name.substr(0, 1).equals_lower("v") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = VRegs[IntVal];
+ return false;
+ } else if (Name.substr(0, 2).equals_lower("cr") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
+ RegNo = CRRegs[IntVal];
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool PPCAsmParser::
+ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ const AsmToken &Tok = Parser.getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
+ RegNo = 0;
+ int64_t IntVal;
+
+ if (!MatchRegisterName(Tok, RegNo, IntVal)) {
+ Parser.Lex(); // Eat identifier token.
+ return false;
+ }
+
+ return Error(StartLoc, "invalid register name");
+}
+
+bool PPCAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ const MCExpr *EVal;
+ PPCOperand *Op;
+
+ // Attempt to parse the next token as an immediate
+ switch (getLexer().getKind()) {
+ // Special handling for register names. These are interpreted
+ // as immediates corresponding to the register number.
+ case AsmToken::Percent:
+ Parser.Lex(); // Eat the '%'.
+ unsigned RegNo;
+ int64_t IntVal;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+ Operands.push_back(Op);
+ return false;
+ }
+ return Error(S, "invalid register name");
+
+ // All other expressions
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Identifier:
+ case AsmToken::Dot:
+ case AsmToken::Dollar:
+ if (!getParser().parseExpression(EVal))
+ break;
+ /* fall through */
+ default:
+ return Error(S, "unknown operand");
+ }
+
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(EVal))
+ Op = PPCOperand::CreateImm(CE->getValue(), S, E, isPPC64());
+ else
+ Op = PPCOperand::CreateExpr(EVal, S, E, isPPC64());
+
+ // Push the parsed operand into the list of operands
+ Operands.push_back(Op);
+
+ // Check for D-form memory operands
+ if (getLexer().is(AsmToken::LParen)) {
+ Parser.Lex(); // Eat the '('.
+ S = Parser.getTok().getLoc();
+
+ int64_t IntVal;
+ switch (getLexer().getKind()) {
+ case AsmToken::Percent:
+ Parser.Lex(); // Eat the '%'.
+ unsigned RegNo;
+ if (MatchRegisterName(Parser.getTok(), RegNo, IntVal))
+ return Error(S, "invalid register name");
+ Parser.Lex(); // Eat the identifier token.
+ break;
+
+ case AsmToken::Integer:
+ if (getParser().parseAbsoluteExpression(IntVal) ||
+ IntVal < 0 || IntVal > 31)
+ return Error(S, "invalid register number");
+ break;
+
+ default:
+ return Error(S, "invalid memory operand");
+ }
+
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "missing ')'");
+ E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+ Operands.push_back(Op);
+ }
+
+ return false;
+}
+
+/// Parse an instruction mnemonic followed by its operands.
+bool PPCAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The first operand is the token for the instruction name.
+ // If the instruction ends in a '.', we need to create a separate
+ // token for it, to match what TableGen is doing.
+ size_t Dot = Name.find('.');
+ StringRef Mnemonic = Name.slice(0, Dot);
+ Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64()));
+ if (Dot != StringRef::npos) {
+ SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot);
+ StringRef DotStr = Name.slice(Dot, StringRef::npos);
+ Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64()));
+ }
+
+ // If there are no more operands then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse the first operand
+ if (ParseOperand(Operands))
+ return true;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement) &&
+ getLexer().is(AsmToken::Comma)) {
+ // Consume the comma token
+ getLexer().Lex();
+
+ // Parse the next operand
+ if (ParseOperand(Operands))
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseDirective parses the PPC specific directives
+bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (IDVal == ".word")
+ return ParseDirectiveWord(4, DirectiveID.getLoc());
+ if (IDVal == ".tc")
+ return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// ParseDirectiveTC
+/// ::= .tc [ symbol (, expression)* ]
+bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+ // Skip TC symbol, which is only used with XCOFF.
+ while (getLexer().isNot(AsmToken::EndOfStatement)
+ && getLexer().isNot(AsmToken::Comma))
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+
+ // Align to word size.
+ getParser().getStreamer().EmitValueToAlignment(Size);
+
+ // Emit expressions.
+ return ParseDirectiveWord(Size, L);
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmParser() {
+ RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
+ RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "PPCGenAsmMatcher.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index bacc108..93fca00 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -151,8 +151,8 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
return printOperand(MI, OpNo, O);
// Branches can take an immediate operand. This is used by the branch
- // selection pass to print $+8, an eight byte displacement from the PC.
- O << "$+";
+ // selection pass to print .+8, an eight byte displacement from the PC.
+ O << ".+";
printAbsAddrOperand(MI, OpNo, O);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 84e4175..7a84723 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -77,6 +77,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case PPC::fixup_ppc_br24:
Type = ELF::R_PPC_REL24;
break;
+ case PPC::fixup_ppc_brcond14:
+ Type = ELF::R_PPC_REL14;
+ break;
case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
@@ -104,7 +107,8 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
Type = ELF::R_PPC64_DTPREL16_HA;
break;
- case MCSymbolRefExpr::VK_None:
+ case MCSymbolRefExpr::VK_PPC_GAS_HA16:
+ case MCSymbolRefExpr::VK_PPC_DARWIN_HA16:
Type = ELF::R_PPC_ADDR16_HA;
break;
case MCSymbolRefExpr::VK_PPC_TOC16_HA:
@@ -131,6 +135,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
Type = ELF::R_PPC64_DTPREL16_LO;
break;
case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_ADDR16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GAS_LO16:
+ case MCSymbolRefExpr::VK_PPC_DARWIN_LO16:
Type = ELF::R_PPC_ADDR16_LO;
break;
case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
@@ -153,6 +161,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC64_ADDR16_DS;
break;
+ case MCSymbolRefExpr::VK_PPC_GAS_LO16:
+ case MCSymbolRefExpr::VK_PPC_DARWIN_LO16:
+ Type = ELF::R_PPC64_ADDR16_LO_DS;
+ break;
case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
Type = ELF::R_PPC64_TOC16_DS;
break;
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index d84eb9c..853e505 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -29,3 +29,18 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
}
llvm_unreachable("Unknown PPC branch opcode!");
}
+
+PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ case PPC::PRED_EQ: return PPC::PRED_EQ;
+ case PPC::PRED_NE: return PPC::PRED_NE;
+ case PPC::PRED_LT: return PPC::PRED_GT;
+ case PPC::PRED_GE: return PPC::PRED_LE;
+ case PPC::PRED_GT: return PPC::PRED_LT;
+ case PPC::PRED_LE: return PPC::PRED_GE;
+ case PPC::PRED_NU: return PPC::PRED_NU;
+ case PPC::PRED_UN: return PPC::PRED_UN;
+ }
+ llvm_unreachable("Unknown PPC branch opcode!");
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index ad2b018..444758c 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -37,6 +37,10 @@ namespace PPC {
/// Invert the specified predicate. != -> ==, < -> >=.
Predicate InvertPredicate(Predicate Opcode);
+
+ /// Assume the condition register is set by MI(a,b), return the predicate if
+ /// we modify the instructions such that condition register is set by MI(b,a).
+ Predicate getSwappedPredicate(Predicate Opcode);
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index 446b685..b4be51a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -31,6 +31,7 @@ namespace llvm {
class MCInst;
FunctionPass *createPPCCTRLoops();
+ FunctionPass *createPPCEarlyReturnPass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@@ -40,7 +41,7 @@ namespace llvm {
/// \brief Creates an PPC-specific Target Transformation Info pass.
ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
-
+
namespace PPCII {
/// Target Operand Flag enum.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 3892162..eb73c67 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -95,6 +95,43 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
// VSX p7 vector-scalar instruction set
//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// RecFormRel - Filter class used to relate non-record-form instructions with
+// their record-form variants.
+class RecFormRel;
+
+//===----------------------------------------------------------------------===//
+// Relation Map Definitions.
+//===----------------------------------------------------------------------===//
+
+def getRecordFormOpcode : InstrMapping {
+ let FilterClass = "RecFormRel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName", "Interpretation64Bit"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["RC"];
+ // The key column are the non-record-form instructions.
+ let KeyCol = ["0"];
+ // Value columns RC=1
+ let ValueCols = [["1"]];
+}
+
+def getNonRecordFormOpcode : InstrMapping {
+ let FilterClass = "RecFormRel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName", "Interpretation64Bit"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["RC"];
+ // The key column are the record-form instructions.
+ let KeyCol = ["1"];
+ // Value columns are RC=0
+ let ValueCols = [["0"]];
+}
+
+//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -216,7 +253,6 @@ def : ProcessorModel<"ppc64", G5Model,
FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
-
//===----------------------------------------------------------------------===//
// Calling Conventions
//===----------------------------------------------------------------------===//
@@ -232,9 +268,14 @@ def PPCAsmWriter : AsmWriter {
bit isMCAsmWriter = 1;
}
+def PPCAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterName = 0;
+}
+
def PPC : Target {
// Information about the instructions.
let InstructionSet = PPCInstrInfo;
let AssemblyWriters = [PPCAsmWriter];
+ let AssemblyParsers = [PPCAsmParser];
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 96a9f0a..3c7cc4e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -721,7 +721,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
return AsmPrinter::EmitFunctionEntryLabel();
// Emit an official procedure descriptor.
- const MCSection *Current = OutStreamer.getCurrentSection();
+ MCSectionSubPair Current = OutStreamer.getCurrentSection();
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
SectionKind::getReadOnly());
@@ -741,7 +741,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
8/*size*/);
// Emit a null environment pointer.
OutStreamer.EmitIntValue(0, 8 /* size */);
- OutStreamer.SwitchSection(Current);
+ OutStreamer.SwitchSection(Current.first, Current.second);
MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
".L." + Twine(CurrentFnSym->getName()));
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index bd1c378..3e608ca 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -112,15 +112,21 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
unsigned MBBStartOffset = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
- if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+ MachineBasicBlock *Dest = 0;
+ if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
+ Dest = I->getOperand(2).getMBB();
+ else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
+ I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) &&
+ !I->getOperand(0).isImm())
+ Dest = I->getOperand(0).getMBB();
+
+ if (!Dest) {
MBBStartOffset += TII->GetInstSizeInBytes(I);
continue;
}
// Determine the offset from the current branch to the destination
// block.
- MachineBasicBlock *Dest = I->getOperand(2).getMBB();
-
int BranchSize;
if (Dest->getNumber() <= MBB.getNumber()) {
// If this is a backwards branch, the delta is the offset from the
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 3244b90..c845909 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -223,9 +223,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
- // to adjust the stack pointer (we fit in the Red Zone). For 64-bit
- // SVR4, we also require a stack frame if we need to spill the CR,
- // since this spill area is addressed relative to the stack pointer.
+ // to adjust the stack pointer (we fit in the Red Zone).
// The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
// stackless code if all local vars are reg-allocated.
bool DisableRedZone = MF.getFunction()->getAttributes().
@@ -237,9 +235,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
- !(Subtarget.isPPC64() && // No 64-bit SVR4 CRsave.
- Subtarget.isSVR4ABI()
- && spillsCR(MF)) &&
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
if (UpdateMF)
@@ -373,6 +368,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Check if the link register (LR) must be saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
+ const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
// Do we have a frame pointer for this function?
bool HasFP = hasFP(MF);
@@ -394,6 +390,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+ if (!MustSaveCRs.empty()) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12);
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
+ }
+
if (HasFP)
BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
.addReg(PPC::X31)
@@ -405,6 +408,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::X0)
.addImm(LROffset / 4)
.addReg(PPC::X1);
+
+ if (!MustSaveCRs.empty())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ .addReg(PPC::X12, getKillRegState(true))
+ .addImm(8)
+ .addReg(PPC::X1);
} else {
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
@@ -417,6 +426,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
.addImm(FPOffset)
.addReg(PPC::R1);
+ assert(MustSaveCRs.empty() &&
+ "Prologue CR saving supported only in 64-bit mode");
+
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
.addReg(PPC::R0)
@@ -580,7 +592,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// spilled CRs.
if (Subtarget.isSVR4ABI()
&& (PPC::CR2 <= Reg && Reg <= PPC::CR4)
- && !spillsCR(MF))
+ && MustSaveCRs.empty())
continue;
// For 64-bit SVR4 when we have spilled CRs, the spill location
@@ -636,6 +648,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
+ const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
// Do we have a frame pointer for this function?
bool HasFP = hasFP(MF);
@@ -736,10 +749,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
.addImm(LROffset/4).addReg(PPC::X1);
+ if (!MustSaveCRs.empty())
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
+ .addImm(8).addReg(PPC::X1);
+
if (HasFP)
BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
.addImm(FPOffset/4).addReg(PPC::X1);
+ if (!MustSaveCRs.empty())
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTCRF8), MustSaveCRs[i])
+ .addReg(PPC::X12, getKillRegState(i == e-1));
+
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
} else {
@@ -747,6 +769,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
.addImm(LROffset).addReg(PPC::R1);
+ assert(MustSaveCRs.empty() &&
+ "Epilogue CR restoring supported only in 64-bit mode");
+
if (HasFP)
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
.addImm(FPOffset).addReg(PPC::R1);
@@ -1122,44 +1147,42 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
*static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
DebugLoc DL;
bool CRSpilled = false;
+ MachineInstrBuilder CRMIB;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
- if (CRSpilled && IsCRField)
- continue;
-
// Add the callee-saved register as live-in; it's killed at the spill.
MBB.addLiveIn(Reg);
+ if (CRSpilled && IsCRField) {
+ CRMIB.addReg(Reg, RegState::ImplicitKill);
+ continue;
+ }
+
// Insert the spill to the stack frame.
if (IsCRField) {
- CRSpilled = true;
- // The first time we see a CR field, store the whole CR into the
- // save slot via GPR12 (available in the prolog for 32- and 64-bit).
+ PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
if (Subtarget.isPPC64()) {
- // 64-bit: SP+8
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12));
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
- .addReg(PPC::X12,
- getKillRegState(true))
- .addImm(8)
- .addReg(PPC::X1));
+ // The actual spill will happen at the start of the prologue.
+ FuncInfo->addMustSaveCR(Reg);
} else {
+ CRSpilled = true;
+ FuncInfo->setSpillsCR();
+
// 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12));
+ CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
+ .addReg(Reg, RegState::ImplicitKill);
+
+ MBB.insert(MI, CRMIB);
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
.addReg(PPC::R12,
getKillRegState(true)),
CSI[i].getFrameIdx()));
}
-
- // Record that we spill the CR in this function.
- PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
- FuncInfo->setSpillsCR();
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.storeRegToStackSlot(MBB, MI, Reg, true,
@@ -1170,7 +1193,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
}
static void
-restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
+restoreCRs(bool isPPC64, bool is31,
+ bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
@@ -1180,14 +1204,10 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
DebugLoc DL;
unsigned RestoreOp, MoveReg;
- if (isPPC64) {
- // 64-bit: SP+8
- MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
- .addImm(8)
- .addReg(PPC::X1));
- RestoreOp = PPC::MTCRF8;
- MoveReg = PPC::X12;
- } else {
+ if (isPPC64)
+ // This is handled during epilogue generation.
+ return;
+ else {
// 32-bit: FP-relative
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
PPC::R12),
@@ -1297,7 +1317,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// least one CR register, restore all spilled CRs together.
if ((CR2Spilled || CR3Spilled || CR4Spilled)
&& !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
- restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+ bool is31 = needsFP(*MF);
+ restoreCRs(Subtarget.isPPC64(), is31,
+ CR2Spilled, CR3Spilled, CR4Spilled,
MBB, I, CSI, CSIIndex);
CR2Spilled = CR3Spilled = CR4Spilled = false;
}
@@ -1320,9 +1342,11 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
}
// If we haven't yet spilled the CRs, do so now.
- if (CR2Spilled || CR3Spilled || CR4Spilled)
- restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+ if (CR2Spilled || CR3Spilled || CR4Spilled) {
+ bool is31 = needsFP(*MF);
+ restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
MBB, I, CSI, CSIIndex);
+ }
return true;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 95efc11..aed0fbb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -457,7 +457,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
SH &= 31;
SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
getI32Imm(ME) };
- return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
}
}
return 0;
@@ -780,7 +780,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
}
case ISD::SETGT: {
SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+ Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
0);
return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
getI32Imm(1));
@@ -873,7 +873,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// Get the specified bit.
SDValue Tmp =
- SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
if (Inv) {
assert(OtherCondIdx == -1 && "Can't have split plus negation");
return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
@@ -885,7 +885,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// Get the other bit of the comparison.
Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
SDValue OtherCond =
- SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
}
@@ -1079,7 +1079,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { Offset, Base, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
- MVT::Other, Ops, 3);
+ MVT::Other, Ops);
} else {
unsigned Opcode;
bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -1114,7 +1114,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
PPCLowering.getPointerTy(),
- MVT::Other, Ops, 3);
+ MVT::Other, Ops);
}
}
@@ -1163,7 +1163,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
N->getOperand(0).getOperand(1),
getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
- return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 16fc8a0..3fcafdc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -71,6 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
PPCRegInfo = TM.getRegisterInfo();
+ PPCII = TM.getInstrInfo();
setPow2DivIsCheap();
@@ -513,7 +514,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
setBooleanContents(ZeroOrOneBooleanContent);
- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+ // Altivec instructions set fields to all zeros or all ones.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (isPPC64) {
setStackPointerRegisterToSaveRestore(PPC::X1);
@@ -4672,10 +4674,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ // We might be able to do better than this under some circumstances, but in
+ // general, fsel-based lowering of select is a finite-math-only optimization.
+ // For more information, see section F.3 of the 2.06 ISA specification.
+ if (!DAG.getTarget().Options.NoInfsFPMath ||
+ !DAG.getTarget().Options.NoNaNsFPMath)
+ return Op;
- // Cannot handle SETEQ/SETNE.
- if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
EVT CmpVT = Op.getOperand(0).getValueType();
@@ -4685,9 +4691,20 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
+ SDValue Sel1;
if (isFloatingPointZero(RHS))
switch (CC) {
default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
@@ -4710,30 +4727,41 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp;
switch (CC) {
default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOGE:
case ISD::SETGE:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOLE:
case ISD::SETLE:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
}
return Op;
}
@@ -6239,29 +6267,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8)) {
- unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
- PPC::ISEL8 : PPC::ISEL;
- unsigned SelectPred = MI->getOperand(4).getImm();
- DebugLoc dl = MI->getDebugLoc();
+ SmallVector<MachineOperand, 2> Cond;
+ Cond.push_back(MI->getOperand(4));
+ Cond.push_back(MI->getOperand(1));
- unsigned SubIdx;
- bool SwapOps;
- switch (SelectPred) {
- default: llvm_unreachable("invalid predicate for isel");
- case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
- case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
- case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
- case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
- case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
- case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
- case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
- case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
- }
-
- BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
- .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
- .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
+ DebugLoc dl = MI->getDebugLoc();
+ PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
+ MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7157b70..423e983 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,6 +16,7 @@
#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
#include "PPC.h"
+#include "PPCInstrInfo.h"
#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -327,6 +328,7 @@ namespace llvm {
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &PPCSubTarget;
const PPCRegisterInfo *PPCRegInfo;
+ const PPCInstrInfo *PPCII;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index fa5b65f..bff4c23 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -17,17 +17,21 @@
//
def s16imm64 : Operand<i64> {
let PrintMethod = "printS16ImmOperand";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
}
def u16imm64 : Operand<i64> {
let PrintMethod = "printU16ImmOperand";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
}
def symbolHi64 : Operand<i64> {
let PrintMethod = "printSymbolHi";
let EncoderMethod = "getHA16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
}
def symbolLo64 : Operand<i64> {
let PrintMethod = "printSymbolLo";
let EncoderMethod = "getLO16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
}
def tocentry : Operand<iPTR> {
let MIOperandInfo = (ops i64imm:$imm);
@@ -66,10 +70,17 @@ def HI48_64 : SDNodeXForm<imm, [{
// Calls.
//
+let Interpretation64Bit = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
- let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
Requires<[In64BitMode]>;
+
+ let isCodeGenOnly = 1 in
+ def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr ${cond:reg}", BrB, []>,
+ Requires<[In64BitMode]>;
+ }
}
let Defs = [LR8] in
@@ -83,8 +94,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
"bdnz $dst">;
}
+
+ let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
+ def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ "bdzlr", BrB, []>;
+ def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ "bdnzlr", BrB, []>;
+ }
}
+
+
let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
@@ -116,9 +136,14 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
"bctrl", BrB, [(PPCbctrl)]>,
Requires<[In64BitMode]>;
+
+ let isCodeGenOnly = 1 in
+ def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl ${cond:reg}", BrB, []>,
+ Requires<[In64BitMode]>;
}
}
-
+} // Interpretation64Bit
// Calls
def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
@@ -135,45 +160,46 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
[(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_SUB_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
[(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_OR_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
[(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_XOR_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
[(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_AND_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
[(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_LOAD_NAND_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
[(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
def ATOMIC_CMP_SWAP_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
[(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
def ATOMIC_SWAP_I64 : Pseudo<
- (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
[(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
}
}
// Instructions to support atomic operations
-def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr),
+def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
"ldarx $rD, $ptr", LdStLDARX,
[(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
let Defs = [CR0] in
-def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
"stdcx. $rS, $dst", LdStSTDCX,
[(PPCstcx i64:$rS, xoaddr:$dst)]>,
isDOT;
+let Interpretation64Bit = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi8 :Pseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
@@ -212,6 +238,7 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
[]>;
}
+} // Interpretation64Bit
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
@@ -224,21 +251,25 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
// 64-bit CR instructions
-def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins g8rc:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
let isCodeGenOnly = 1 in
-def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+def MFCR8pseud: XFXForm_3<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
"#MFCR8pseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
-
-def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+} // neverHasSideEffects = 1
+
+let neverHasSideEffects = 1 in
+def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
- def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP64",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In64BitMode]>;
@@ -253,18 +284,18 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
// 64-bit SPR manipulation instrs.
let Uses = [CTR8] in {
-def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
-def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Pattern = [(set i64:$rT, readcyclecounter)] in
-def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
"mfspr $rT, 268", SprMFTB>,
PPC970_DGroup_First, PPC970_Unit_FXU;
// Note that encoding mftb using mfspr is now the preferred form,
@@ -273,252 +304,265 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
// the POWER3.
let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
+def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
[(set i64:$result,
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
let Defs = [LR8] in {
-def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
"mtlr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR8] in {
-def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
"mflr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+} // Interpretation64Bit
//===----------------------------------------------------------------------===//
// Fixed point instructions.
//
let PPC970_Unit = 1 in { // FXU Operations.
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins symbolLo64:$imm),
"li $rD, $imm", IntSimple,
[(set i64:$rD, immSExt16:$imm)]>;
-def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins symbolHi64:$imm),
"lis $rD, $imm", IntSimple,
[(set i64:$rD, imm16ShiftedSExt:$imm)]>;
}
// Logical ops.
-def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "nand $rA, $rS, $rB", IntSimple,
- [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
-def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "and $rA, $rS, $rB", IntSimple,
- [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
-def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "andc $rA, $rS, $rB", IntSimple,
- [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
-def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "or $rA, $rS, $rB", IntSimple,
- [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
-def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "nor $rA, $rS, $rB", IntSimple,
- [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
-def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "orc $rA, $rS, $rB", IntSimple,
- [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
-def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "eqv $rA, $rS, $rB", IntSimple,
- [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
-def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
- "xor $rA, $rS, $rB", IntSimple,
- [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "nand", "$rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
+defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "and", "$rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "andc", "$rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "or", "$rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
+defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "nor", "$rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "orc", "$rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "eqv", "$rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
+defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "xor", "$rA, $rS, $rB", IntSimple,
+ [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
// Logical ops with immediate.
-def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+let Defs = [CR0] in {
+def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
[(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
isDOT;
-def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
[(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
-def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+}
+def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
[(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
-def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
[(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
[(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
-def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
[(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
-def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "add $rT, $rA, $rB", IntSimple,
- [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "add", "$rT, $rA, $rB", IntSimple,
+ [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model.
let isCodeGenOnly = 1 in
-def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
"add $rT, $rA, $rB@tls", IntSimple,
[(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
-let Defs = [CARRY] in {
-def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "addc $rT, $rA, $rB", IntGeneral,
- [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
- PPC970_DGroup_Cracked;
-def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "addc", "$rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
+ PPC970_DGroup_Cracked;
+let Defs = [CARRY] in
+def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
"addic $rD, $rA, $imm", IntGeneral,
[(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
-}
-def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
+def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolLo64:$imm),
"addi $rD, $rA, $imm", IntSimple,
[(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
+def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolHi64:$imm),
"addis $rD, $rA, $imm", IntSimple,
[(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
-def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
[(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
-def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "subfc $rT, $rA, $rB", IntGeneral,
- [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
- PPC970_DGroup_Cracked;
-}
-def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "subf $rT, $rA, $rB", IntGeneral,
- [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
-def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
- "neg $rT, $rA", IntSimple,
- [(set i64:$rT, (ineg i64:$rA))]>;
-let Uses = [CARRY], Defs = [CARRY] in {
-def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "adde $rT, $rA, $rB", IntGeneral,
- [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
-def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
- "addme $rT, $rA", IntGeneral,
- [(set i64:$rT, (adde i64:$rA, -1))]>;
-def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
- "addze $rT, $rA", IntGeneral,
- [(set i64:$rT, (adde i64:$rA, 0))]>;
-def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "subfe $rT, $rA, $rB", IntGeneral,
- [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
-def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
- "subfme $rT, $rA", IntGeneral,
- [(set i64:$rT, (sube -1, i64:$rA))]>;
-def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
- "subfze $rT, $rA", IntGeneral,
- [(set i64:$rT, (sube 0, i64:$rA))]>;
-}
-
-
-def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "mulhd $rT, $rA, $rB", IntMulHW,
- [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
-def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "mulhdu $rT, $rA, $rB", IntMulHWU,
- [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
-
-def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
- "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
-def CMPLD : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
- "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
-def CMPDI : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
- "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
-def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
- "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
-
-def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
- "sld $rA, $rS, $rB", IntRotateD,
- [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
-def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
- "srd $rA, $rS, $rB", IntRotateD,
- [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
-let Defs = [CARRY] in {
-def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
- "srad $rA, $rS, $rB", IntRotateD,
- [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subfc", "$rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subf", "$rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "neg", "$rT, $rA", IntSimple,
+ [(set i64:$rT, (ineg i64:$rA))]>;
+let Uses = [CARRY] in {
+defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "adde", "$rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
+defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "addme", "$rT, $rA", IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, -1))]>;
+defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "addze", "$rT, $rA", IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, 0))]>;
+defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subfe", "$rT, $rA, $rB", IntGeneral,
+ [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
+defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "subfme", "$rT, $rA", IntGeneral,
+ [(set i64:$rT, (sube -1, i64:$rA))]>;
+defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "subfze", "$rT, $rA", IntGeneral,
+ [(set i64:$rT, (sube 0, i64:$rA))]>;
}
-
-def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsb $rA, $rS", IntSimple,
- [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
-def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsh $rA, $rS", IntSimple,
- [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
-
-def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
- "extsw $rA, $rS", IntSimple,
- [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
-def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
- "extsw $rA, $rS", IntSimple,
- [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
-let Defs = [CARRY] in {
-def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
- "sradi $rA, $rS, $SH", IntRotateDI,
- [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+
+defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulhd", "$rT, $rA, $rB", IntMulHW,
+ [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
+defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulhdu", "$rT, $rA, $rB", IntMulHWU,
+ [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+}
+} // Interpretation64Bit
+
+let isCompare = 1, neverHasSideEffects = 1 in {
+ def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+ "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+ def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+ "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+ def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm),
+ "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+ def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2),
+ "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
}
-def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
- "cntlzd $rA, $rS", IntGeneral,
- [(set i64:$rA, (ctlz i64:$rS))]>;
-def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
- "popcntd $rA, $rS", IntGeneral,
- [(set i64:$rA, (ctpop i64:$rS))]>;
+
+let neverHasSideEffects = 1 in {
+defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "sld", "$rA, $rS, $rB", IntRotateD,
+ [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "srd", "$rA, $rS, $rB", IntRotateD,
+ [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "srad", "$rA, $rS, $rB", IntRotateD,
+ [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+
+let Interpretation64Bit = 1 in {
+defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsb", "$rA, $rS", IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
+defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsh", "$rA, $rS", IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
+} // Interpretation64Bit
+
+defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsw", "$rA, $rS", IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
+let Interpretation64Bit = 1 in
+defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsw", "$rA, $rS", IntSimple,
+ [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+
+defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+ "sradi", "$rA, $rS, $SH", IntRotateDI,
+ [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
+ "cntlzd", "$rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctlz i64:$rS))]>;
+defm POPCNTD : XForm_11r<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
+ "popcntd", "$rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
-def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
- "popcntw $rA, $rS", IntGeneral,
- [(set i32:$rA, (ctpop i32:$rS))]>;
-
-def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "divd $rT, $rA, $rB", IntDivD,
- [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "divdu $rT, $rA, $rB", IntDivD,
- [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
- "mulld $rT, $rA, $rB", IntMulHD,
- [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
-
+defm POPCNTW : XForm_11r<31, 378, (outs gprc:$rA), (ins gprc:$rS),
+ "popcntw", "$rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
+
+defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divd", "$rT, $rA, $rB", IntDivD,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdu", "$rT, $rA, $rB", IntDivD,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulld", "$rT, $rA, $rB", IntMulHD,
+ [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+}
+let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def RLDIMI : MDForm_1<30, 3,
- (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
- "rldimi $rA, $rS, $SH, $MB", IntRotateDI,
- []>, isPPC64, RegConstraint<"$rSi = $rA">,
- NoEncode<"$rSi">;
+defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
+ (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
}
// Rotate instructions.
-def RLDCL : MDForm_1<30, 0,
- (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE),
- "rldcl $rA, $rS, $rB, $MBE", IntRotateD,
- []>, isPPC64;
-def RLDICL : MDForm_1<30, 0,
- (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
- []>, isPPC64;
-def RLDICR : MDForm_1<30, 1,
- (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicr $rA, $rS, $SH, $MBE", IntRotateDI,
- []>, isPPC64;
-
-def RLWINM8 : MForm_2<21,
- (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
- []>;
-
+defm RLDCL : MDSForm_1r<30, 8,
+ (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
+ "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD,
+ []>, isPPC64;
+defm RLDICL : MDForm_1r<30, 0,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64;
+defm RLDICR : MDForm_1r<30, 1,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64;
+
+let Interpretation64Bit = 1 in {
+defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
+ (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>;
+
+let isSelect = 1 in
def ISEL8 : AForm_4<31, 15,
- (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
+ (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
+} // Interpretation64Bit
+} // neverHasSideEffects = 1
} // End FXU Operations.
@@ -529,39 +573,43 @@ def ISEL8 : AForm_4<31, 15,
// Sign extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+let Interpretation64Bit = 1 in
+def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
[(set i64:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
-def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
"lwa $rD, $src", LdStLWA,
[(set i64:$rD,
(aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
-def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+let Interpretation64Bit = 1 in
+def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
[(set i64:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
-def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
"lwax $rD, $src", LdStLHA,
[(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
// Update forms.
-let mayLoad = 1 in {
-def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Interpretation64Bit = 1 in
+def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memri:$addr),
"lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+let Interpretation64Bit = 1 in
+def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
@@ -569,87 +617,89 @@ def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
}
}
+let Interpretation64Bit = 1 in {
// Zero extending loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
[(set i64:$rD, (zextloadi8 iaddr:$src))]>;
-def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
[(set i64:$rD, (zextloadi16 iaddr:$src))]>;
-def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
[(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
-def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src),
+def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
[(set i64:$rD, (zextloadi8 xaddr:$src))]>;
-def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
[(set i64:$rD, (zextloadi16 xaddr:$src))]>;
-def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src),
+def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
[(set i64:$rD, (zextloadi32 xaddr:$src))]>;
// Update forms.
-let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
}
+} // Interpretation64Bit
// Full 8-byte loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
// The following three definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
-def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtoc",
[(set i64:$rD,
(PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocJTI",
[(set i64:$rD,
(PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
-def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
"#LDtocCPT",
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
let hasSideEffects = 1, isCodeGenOnly = 1 in {
let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
+def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
"ld 2, 8($reg)", LdStLD,
[(PPCload_toc i64:$reg)]>, isPPC64;
@@ -658,25 +708,26 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
"ld 2, 40(1)", LdStLD,
[(PPCtoc_restore)]>, isPPC64;
}
-def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src),
+def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", LdStLD,
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
-def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src),
+def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src),
"ldbrx $rD, $src", LdStLoad,
[(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
-let mayLoad = 1 in
-def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
"ldu $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"ldux $rD, $addr", LdStLDU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
+}
def : Pat<(PPCload ixaddr:$src),
(LD ixaddr:$src)>;
@@ -684,108 +735,111 @@ def : Pat<(PPCload xaddr:$src),
(LDX xaddr:$src)>;
// Support for medium and large code model.
-def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDIStocHA",
[(set i64:$rD,
(PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
isPPC64;
-def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
+def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL",
[(set i64:$rD,
(PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItocL",
[(set i64:$rD,
(PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
// Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
"#ADDISgotTprelHA",
[(set i64:$rD,
(PPCaddisGotTprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
+def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins symbolLo64:$disp, g8rc_nox0:$reg),
"#LDgotTprelL",
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
(ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
"#ADDIStlsgdHA",
[(set i64:$rD,
(PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
"#ADDItlsgdL",
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
(PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
"#ADDItlsldL",
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsldADDR",
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
(PPCaddisDtprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
"#ADDIdtprelL",
[(set i64:$rD,
(PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
let PPC970_Unit = 2 in {
+let Interpretation64Bit = 1 in {
// Truncating stores.
-def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
"stb $rS, $src", LdStStore,
[(truncstorei8 i64:$rS, iaddr:$src)]>;
-def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
"sth $rS, $src", LdStStore,
[(truncstorei16 i64:$rS, iaddr:$src)]>;
-def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
"stw $rS, $src", LdStStore,
[(truncstorei32 i64:$rS, iaddr:$src)]>;
-def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
[(truncstorei8 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
[(truncstorei16 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
[(truncstorei32 i64:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
+} // Interpretation64Bit
+
// Normal 8-byte stores.
-def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
"std $rS, $dst", LdStSTD,
[(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
-def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
"stdx $rS, $dst", LdStSTD,
[(store i64:$rS, xaddr:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
-def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
"stdbrx $rS, $dst", LdStStore,
[(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
@@ -793,33 +847,36 @@ def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
// Stores with Update (pre-inc).
let PPC970_Unit = 2, mayStore = 1 in {
-def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+let Interpretation64Bit = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
"stbu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
"sthu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
"stwu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
"stdu $rS, $dst", LdStSTDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
-def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
"stbux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
"sthux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
"stwux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+} // Interpretation64Bit
+
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
"stdux $rS, $dst", LdStSTDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
@@ -852,29 +909,30 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
//
-let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations.
-def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
- "fcfid $frD, $frB", FPGeneral,
- [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
-def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
- "fctidz $frD, $frB", FPGeneral,
- [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
-
-def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
- "fcfidu $frD, $frB", FPGeneral,
- [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
-def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
- "fcfids $frD, $frB", FPGeneral,
- [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
-def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
- "fcfidus $frD, $frB", FPGeneral,
- [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
-def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
- "fctiduz $frD, $frB", FPGeneral,
- [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
-def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
- "fctiwuz $frD, $frB", FPGeneral,
- [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
+let PPC970_Unit = 3, neverHasSideEffects = 1,
+ Uses = [RM] in { // FPU Operations.
+defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fcfid", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctidz", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fcfidu", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
+ "fcfids", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
+ "fcfidus", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiduz", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwuz", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index a5ba4c8..cc9cf0a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -163,7 +163,7 @@ def vecspltisw : PatLeaf<(build_vector), [{
// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
- : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
!strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
[(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
@@ -171,7 +171,7 @@ class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
// inputs doesn't match the type of the output.
class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
- : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
!strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
[(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
@@ -179,14 +179,14 @@ class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
// input types and an output type.
class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType In1Ty, ValueType In2Ty>
- : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
!strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
[(set OutTy:$vD,
(IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
- : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
!strconcat(opc, " $vD, $vA, $vB"), VecFP,
[(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
@@ -194,7 +194,7 @@ class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
// inputs doesn't match the type of the output.
class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
- : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
!strconcat(opc, " $vD, $vA, $vB"), VecFP,
[(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
@@ -202,13 +202,13 @@ class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
// input types and an output type.
class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType In1Ty, ValueType In2Ty>
- : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
!strconcat(opc, " $vD, $vA, $vB"), VecFP,
[(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
- : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
!strconcat(opc, " $vD, $vB"), VecFP,
[(set v4f32:$vD, (IntID v4f32:$vB))]>;
@@ -216,7 +216,7 @@ class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
// inputs doesn't match the type of the output.
class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
- : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+ : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
!strconcat(opc, " $vD, $vB"), VecFP,
[(set OutTy:$vD, (IntID InTy:$vB))]>;
@@ -234,93 +234,93 @@ def DSSALL : DSS_Form<822, (outs),
(ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
"dssall", LdStLoad /*FIXME*/, []>;
def DST : DSS_Form<342, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
"dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTT : DSS_Form<342, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
"dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTST : DSS_Form<374, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
"dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTSTT : DSS_Form<374, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+ (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
"dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DST64 : DSS_Form<342, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
"dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTT64 : DSS_Form<342, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
"dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTST64 : DSS_Form<374, (outs),
- (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
"dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
def DSTSTT64 : DSS_Form<374, (outs),
- (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+ (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
"dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
}
-def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
"mfvscr $vD", LdStStore,
[(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
-def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
-def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src),
+def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
-def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src),
+def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src),
"lvehx $vD, $src", LdStLoad,
[(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
-def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src),
+def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src),
"lvewx $vD, $src", LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
-def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src),
"lvx $vD, $src", LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
-def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src),
"lvxl $vD, $src", LdStLoad,
[(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
}
-def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src),
+def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src),
"lvsl $vD, $src", LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
PPC970_Unit_LSU;
-def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src),
+def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src),
"lvsr $vD, $src", LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2 in { // Stores.
-def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
"stvebx $rS, $dst", LdStStore,
[(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
-def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
"stvehx $rS, $dst", LdStStore,
[(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
-def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
"stvewx $rS, $dst", LdStStore,
[(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
-def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
"stvx $rS, $dst", LdStStore,
[(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
-def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
"stvxl $rS, $dst", LdStStore,
[(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
}
let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
-def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
[(set v4f32:$vD,
(fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
// FIXME: The fma+fneg pattern won't match because fneg is not legal.
-def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
"vnmsubfp $vD, $vA, $vC, $vB", VecFP,
[(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
(fneg v4f32:$vB))))]>;
@@ -335,23 +335,23 @@ def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
-def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
"vsldoi $vD, $vA, $vB, $SH", VecFP,
[(set v16i8:$vD,
(vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
// VX-Form instructions. AltiVec arithmetic ops.
-def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddfp $vD, $vA, $vB", VecFP,
[(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
-def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddubm $vD, $vA, $vB", VecGeneral,
[(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
-def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vadduhm $vD, $vA, $vB", VecGeneral,
[(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
-def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vadduwm $vD, $vA, $vB", VecGeneral,
[(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
@@ -364,27 +364,27 @@ def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
-def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vand $vD, $vA, $vB", VecFP,
[(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
-def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vandc $vD, $vA, $vB", VecFP,
[(set v4i32:$vD, (and v4i32:$vA,
(vnot_ppc v4i32:$vB)))]>;
-def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vcfsx $vD, $vB, $UIMM", VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
-def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vcfux $vD, $vB, $UIMM", VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
-def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vctsxs $vD, $vB, $UIMM", VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
-def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vctuxs $vD, $vB, $UIMM", VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
@@ -393,19 +393,19 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
// to integer (fp_to_sint/fp_to_uint) conversions and integer
// to floating-point (sint_to_fp/uint_to_fp) conversions.
let VA = 0 in {
-def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
"vcfsx $vD, $vB, 0", VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
-def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB),
"vctuxs $vD, $vB, 0", VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
-def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB),
"vcfux $vD, $vB, 0", VecFP,
[(set v4f32:$vD,
(int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
-def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB),
"vctsxs $vD, $vB, 0", VecFP,
[(set v4i32:$vD,
(int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
@@ -435,22 +435,22 @@ def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
-def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrghb $vD, $vA, $vB", VecFP,
[(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrghh $vD, $vA, $vB", VecFP,
[(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrghw $vD, $vA, $vB", VecFP,
[(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrglb $vD, $vA, $vB", VecFP,
[(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrglh $vD, $vA, $vB", VecFP,
[(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrglw $vD, $vA, $vB", VecFP,
[(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
@@ -491,18 +491,18 @@ def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>;
def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>;
def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
-def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
+def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
-def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsubfp $vD, $vA, $vB", VecGeneral,
[(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
-def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsububm $vD, $vA, $vB", VecGeneral,
[(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
-def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsubuhm $vD, $vA, $vB", VecGeneral,
[(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
-def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsubuwm $vD, $vA, $vB", VecGeneral,
[(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
@@ -516,21 +516,21 @@ def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
-def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
v4i32, v16i8, v4i32>;
def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
v4i32, v8i16, v4i32>;
def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
v4i32, v16i8, v4i32>;
-def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vnor $vD, $vA, $vB", VecFP,
[(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
v4i32:$vB)))]>;
-def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vor $vD, $vA, $vB", VecFP,
[(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
-def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vxor $vD, $vA, $vB", VecFP,
[(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
@@ -545,15 +545,15 @@ def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
-def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vspltb $vD, $vB, $UIMM", VecPerm,
[(set v16i8:$vD,
(vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vsplth $vD, $vB, $UIMM", VecPerm,
[(set v16i8:$vD,
(vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
"vspltw $vD, $vB, $UIMM", VecPerm,
[(set v16i8:$vD,
(vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
@@ -569,13 +569,13 @@ def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
-def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM),
"vspltisb $vD, $SIMM", VecPerm,
[(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
-def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM),
"vspltish $vD, $SIMM", VecPerm,
[(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
-def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
"vspltisw $vD, $SIMM", VecPerm,
[(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
@@ -590,13 +590,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
v16i8, v4i32>;
def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
v8i16, v4i32>;
-def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpkuhum $vD, $vA, $vB", VecFP,
[(set v16i8:$vD,
(vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
v16i8, v8i16>;
-def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vpkuwum $vD, $vA, $vB", VecFP,
[(set v16i8:$vD,
(vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
@@ -621,10 +621,10 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
// Altivec Comparisons.
class VCMP<bits<10> xo, string asmstr, ValueType Ty>
- : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
[(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
- : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
[(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
let Defs = [CR6];
let RC = 1;
@@ -665,11 +665,11 @@ def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
let isCodeGenOnly = 1 in
-def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", VecFP,
[(set v4i32:$vD, (v4i32 immAllZerosV))]>;
let IMM=-1 in {
-def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
"vspltisw $vD, -1", VecFP,
[(set v4i32:$vD, (v4i32 immAllOnesV))]>;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 400b7e3..b6f4e85 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -35,6 +35,15 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
let TSFlags{1} = PPC970_Single;
let TSFlags{2} = PPC970_Cracked;
let TSFlags{5-3} = PPC970_Unit;
+
+ // Fields used for relation models.
+ string BaseName = "";
+
+ // For cases where multiple instruction definitions really represent the
+ // same underlying instruction but with one definition for 64-bit arguments
+ // and one for 32-bit arguments, this bit breaks the degeneracy between
+ // the two forms and allows TableGen to generate mapping tables.
+ bit Interpretation64Bit = 0;
}
class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
@@ -80,6 +89,10 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
let TSFlags{1} = PPC970_Single;
let TSFlags{2} = PPC970_Cracked;
let TSFlags{5-3} = PPC970_Unit;
+
+ // Fields used for relation models.
+ string BaseName = "";
+ bit Interpretation64Bit = 0;
}
// 1.7.1 I-Form
@@ -177,7 +190,12 @@ class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
- : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
+
+ // Even though ADDICo does not really have an RC bit, provide
+ // the declaration of one here so that isDOT has something to set.
+ bit RC = 0;
+}
class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -347,6 +365,12 @@ class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+}
+
class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -565,9 +589,9 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
bits<3> CR;
- let BO = BIBO{2-6};
- let BI{0-1} = BIBO{0-1};
- let BI{2-4} = CR;
+ let BO = BIBO{4-0};
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
let BH = 0;
}
@@ -837,6 +861,25 @@ class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = RC;
}
+class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-30} = xo;
+ let Inst{31} = RC;
+}
// E-1 VA-Form
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 69c54ed..1fb17eb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,8 +18,10 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,6 +32,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#define GET_INSTRMAP_INFO
#define GET_INSTRINFO_CTOR
#include "PPCGenInstrInfo.inc"
@@ -39,6 +42,9 @@ static cl::
opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
cl::desc("Disable analysis for CTR loops"));
+static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
+cl::desc("Disable compare instruction optimization"), cl::Hidden);
+
PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
@@ -147,7 +153,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MachineFunction &MF = *MI->getParent()->getParent();
// Normal instructions can be commuted the obvious way.
- if (MI->getOpcode() != PPC::RLWIMI)
+ if (MI->getOpcode() != PPC::RLWIMI &&
+ MI->getOpcode() != PPC::RLWIMIo)
return TargetInstrInfo::commuteInstruction(MI, NewMI);
// Cannot commute if it has a non-zero rotate count.
@@ -417,6 +424,105 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return 2;
}
+// Select analysis.
+bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ if (!TM.getSubtargetImpl()->hasISEL())
+ return false;
+
+ if (Cond.size() != 2)
+ return false;
+
+ // If this is really a bdnz-like condition, then it cannot be turned into a
+ // select.
+ if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // isel is for regular integer GPRs only.
+ if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
+ !PPC::G8RCRegClass.hasSubClassEq(RC))
+ return false;
+
+ // FIXME: These numbers are for the A2, how well they work for other cores is
+ // an open question. On the A2, the isel instruction has a 2-cycle latency
+ // but single-cycle throughput. These numbers are used in combination with
+ // the MispredictPenalty setting from the active SchedMachineModel.
+ CondCycles = 1;
+ TrueCycles = 1;
+ FalseCycles = 1;
+
+ return true;
+}
+
+void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc dl,
+ unsigned DestReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ assert(Cond.size() == 2 &&
+ "PPC branch conditions have two components!");
+
+ assert(TM.getSubtargetImpl()->hasISEL() &&
+ "Cannot insert select on target without ISEL support");
+
+ // Get the register classes.
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ assert(RC && "TrueReg and FalseReg must have overlapping register classes");
+ assert((PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RCRegClass.hasSubClassEq(RC)) &&
+ "isel is for regular integer GPRs only");
+
+ unsigned OpCode =
+ PPC::GPRCRegClass.hasSubClassEq(RC) ? PPC::ISEL : PPC::ISEL8;
+ unsigned SelectPred = Cond[0].getImm();
+
+ unsigned SubIdx;
+ bool SwapOps;
+ switch (SelectPred) {
+ default: llvm_unreachable("invalid predicate for isel");
+ case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+ }
+
+ unsigned FirstReg = SwapOps ? FalseReg : TrueReg,
+ SecondReg = SwapOps ? TrueReg : FalseReg;
+
+ // The first input register of isel cannot be r0. If it is a member
+ // of a register class that can be r0, then copy it first (the
+ // register allocator should eliminate the copy).
+ if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
+ MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
+ const TargetRegisterClass *FirstRC =
+ MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
+ &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
+ unsigned OldFirstReg = FirstReg;
+ FirstReg = MRI.createVirtualRegister(FirstRC);
+ BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
+ .addReg(OldFirstReg);
+ }
+
+ BuildMI(MBB, MI, dl, get(OpCode), DestReg)
+ .addReg(FirstReg).addReg(SecondReg)
+ .addReg(Cond[1].getReg(), 0, SubIdx);
+}
+
void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -707,6 +813,555 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
+bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const {
+ // For some instructions, it is legal to fold ZERO into the RA register field.
+ // A zero immediate should always be loaded with a single li.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ return false;
+ if (DefMI->getOperand(1).getImm() != 0)
+ return false;
+
+ // Note that we cannot here invert the arguments of an isel in order to fold
+ // a ZERO into what is presented as the second argument. All we have here
+ // is the condition bit, and that might come from a CR-logical bit operation.
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+
+ // Only fold into real machine instructions.
+ if (UseMCID.isPseudo())
+ return false;
+
+ unsigned UseIdx;
+ for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx)
+ if (UseMI->getOperand(UseIdx).isReg() &&
+ UseMI->getOperand(UseIdx).getReg() == Reg)
+ break;
+
+ assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI");
+ assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
+
+ const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx];
+
+ // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
+ // register (which might also be specified as a pointer class kind).
+ if (UseInfo->isLookupPtrRegClass()) {
+ if (UseInfo->RegClass /* Kind */ != 1)
+ return false;
+ } else {
+ if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
+ UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
+ return false;
+ }
+
+ // Make sure this is not tied to an output register (or otherwise
+ // constrained). This is true for ST?UX registers, for example, which
+ // are tied to their output registers.
+ if (UseInfo->Constraints != 0)
+ return false;
+
+ unsigned ZeroReg;
+ if (UseInfo->isLookupPtrRegClass()) {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
+ } else {
+ ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
+ PPC::ZERO8 : PPC::ZERO;
+ }
+
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ UseMI->getOperand(UseIdx).setReg(ZeroReg);
+
+ if (DeleteDef)
+ DefMI->eraseFromParent();
+
+ return true;
+}
+
+static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I)
+ if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
+ return true;
+ return false;
+}
+
+// We should make sure that, if we're going to predicate both sides of a
+// condition (a diamond), that both sides don't define the counter register. We
+// can predicate counter-decrement-based branches, but while that predicates
+// the branching, it does not predicate the counter decrement. If we tried to
+// merge the triangle into one predicated block, we'd decrement the counter
+// twice.
+bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const {
+ return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
+}
+
+
+bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // The predicated branches are identified by their type, not really by the
+ // explicit presence of a predicate. Furthermore, some of them can be
+ // predicated more than once. Because if conversion won't try to predicate
+ // any instruction which already claims to be predicated (by returning true
+ // here), always return false. In doing so, we let isPredicable() be the
+ // final word on whether not the instruction can be (further) predicated.
+
+ return false;
+}
+
+bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator())
+ return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+
+ return !isPredicated(MI);
+}
+
+bool PPCInstrInfo::PredicateInstruction(
+ MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned OpC = MI->getOpcode();
+ if (OpC == PPC::BLR) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ MI->setDesc(get(Pred[0].getImm() ?
+ (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
+ (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
+ } else {
+ MI->setDesc(get(PPC::BCLR));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ }
+
+ return true;
+ } else if (OpC == PPC::B) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ MI->setDesc(get(Pred[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
+ } else {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BCC));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ }
+
+ return true;
+ } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 ||
+ OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
+ llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
+
+ bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
+ (setLR ? PPC::BCCTRL : PPC::BCCTR)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCInstrInfo::SubsumesPredicate(
+ const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ assert(Pred1.size() == 2 && "Invalid PPC first predicate");
+ assert(Pred2.size() == 2 && "Invalid PPC second predicate");
+
+ if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
+ return false;
+ if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
+ return false;
+
+ PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
+ PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
+
+ if (P1 == P2)
+ return true;
+
+ // Does P1 subsume P2, e.g. GE subsumes GT.
+ if (P1 == PPC::PRED_LE &&
+ (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
+ return true;
+ if (P1 == PPC::PRED_GE &&
+ (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
+ return true;
+
+ return false;
+}
+
+bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // Note: At the present time, the contents of Pred from this function is
+ // unused by IfConversion. This implementation follows ARM by pushing the
+ // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
+ // predicate, instructions defining CTR or CTR8 are also included as
+ // predicate-defining instructions.
+
+ const TargetRegisterClass *RCs[] =
+ { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
+ &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
+ const TargetRegisterClass *RC = RCs[c];
+ if (MO.isReg()) {
+ if (MO.isDef() && RC->contains(MO.getReg())) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ } else if (MO.isRegMask()) {
+ for (TargetRegisterClass::iterator I = RC->begin(),
+ IE = RC->end(); I != IE; ++I)
+ if (MO.clobbersPhysReg(*I)) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+ }
+ }
+
+ return Found;
+}
+
+bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
+ unsigned OpC = MI->getOpcode();
+ switch (OpC) {
+ default:
+ return false;
+ case PPC::B:
+ case PPC::BLR:
+ case PPC::BCTR:
+ case PPC::BCTR8:
+ case PPC::BCTRL:
+ case PPC::BCTRL8:
+ return true;
+ }
+}
+
+bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ switch (Opc) {
+ default: return false;
+ case PPC::CMPWI:
+ case PPC::CMPLWI:
+ case PPC::CMPDI:
+ case PPC::CMPLDI:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ Mask = 0xFFFF;
+ return true;
+ case PPC::CMPW:
+ case PPC::CMPLW:
+ case PPC::CMPD:
+ case PPC::CMPLD:
+ case PPC::FCMPUS:
+ case PPC::FCMPUD:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+ }
+}
+
+bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const {
+ if (DisableCmpOpt)
+ return false;
+
+ int OpC = CmpInstr->getOpcode();
+ unsigned CRReg = CmpInstr->getOperand(0).getReg();
+
+ // FP record forms set CR1 based on the execption status bits, not a
+ // comparison with zero.
+ if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
+ return false;
+
+ // The record forms set the condition register based on a signed comparison
+ // with zero (so says the ISA manual). This is not as straightforward as it
+ // seems, however, because this is always a 64-bit comparison on PPC64, even
+ // for instructions that are 32-bit in nature (like slw for example).
+ // So, on PPC32, for unsigned comparisons, we can use the record forms only
+ // for equality checks (as those don't depend on the sign). On PPC64,
+ // we are restricted to equality for unsigned 64-bit comparisons and for
+ // signed 32-bit comparisons the applicability is more restricted.
+ bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
+ bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
+ bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
+
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+ int MIOpC = MI->getOpcode();
+
+ bool equalityOnly = false;
+ bool noSub = false;
+ if (isPPC64) {
+ if (is32BitSignedCompare) {
+ // We can perform this optimization only if MI is sign-extending.
+ if (MIOpC == PPC::SRAW || MIOpC == PPC::SRAWo ||
+ MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo ||
+ MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo ||
+ MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo ||
+ MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) {
+ noSub = true;
+ } else
+ return false;
+ } else if (is32BitUnsignedCompare) {
+ // We can perform this optimization, equality only, if MI is
+ // zero-extending.
+ if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
+ MIOpC == PPC::SLW || MIOpC == PPC::SLWo ||
+ MIOpC == PPC::SRW || MIOpC == PPC::SRWo) {
+ noSub = true;
+ equalityOnly = true;
+ } else
+ return false;
+ } else
+ equalityOnly = is64BitUnsignedCompare;
+ } else
+ equalityOnly = is32BitUnsignedCompare;
+
+ if (equalityOnly) {
+ // We need to check the uses of the condition register in order to reject
+ // non-equality comparisons.
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
+ IE = MRI->use_end(); I != IE; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->getOpcode() == PPC::BCC) {
+ unsigned Pred = UseMI->getOperand(0).getImm();
+ if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE)
+ continue;
+
+ return false;
+ } else if (UseMI->getOpcode() == PPC::ISEL ||
+ UseMI->getOpcode() == PPC::ISEL8) {
+ unsigned SubIdx = UseMI->getOperand(3).getSubReg();
+ if (SubIdx == PPC::sub_eq)
+ continue;
+
+ return false;
+ } else
+ return false;
+ }
+ }
+
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator I = CmpInstr, E = MI,
+ B = CmpInstr->getParent()->begin();
+
+ // Scan forward to find the first use of the compare.
+ for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
+ I != EL; ++I) {
+ bool FoundUse = false;
+ for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg),
+ JE = MRI->use_end(); J != JE; ++J)
+ if (&*J == &*I) {
+ FoundUse = true;
+ break;
+ }
+
+ if (FoundUse)
+ break;
+ }
+
+ // Early exit if we're at the beginning of the BB.
+ if (I == B) return false;
+
+ // There are two possible candidates which can be changed to set CR[01].
+ // One is MI, the other is a SUB instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ MachineInstr *Sub = NULL;
+ if (SrcReg2 != 0)
+ // MI is not a candidate for CMPrr.
+ MI = NULL;
+ // FIXME: Conservatively refuse to convert an instruction which isn't in the
+ // same BB as the comparison. This is to allow the check below to avoid calls
+ // (and other explicit clobbers); instead we should really check for these
+ // more explicitly (in at least a few predecessors).
+ else if (MI->getParent() != CmpInstr->getParent() || Value != 0) {
+ // PPC does not have a record-form SUBri.
+ return false;
+ }
+
+ // Search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ --I;
+ for (; I != E && !noSub; --I) {
+ const MachineInstr &Instr = *I;
+ unsigned IOpC = Instr.getOpcode();
+
+ if (&*I != CmpInstr && (
+ Instr.modifiesRegister(PPC::CR0, TRI) ||
+ Instr.readsRegister(PPC::CR0, TRI)))
+ // This instruction modifies or uses the record condition register after
+ // the one we want to change. While we could do this transformation, it
+ // would likely not be profitable. This transformation removes one
+ // instruction, and so even forcing RA to generate one move probably
+ // makes it unprofitable.
+ return false;
+
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
+ OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
+ (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
+ ((Instr.getOperand(1).getReg() == SrcReg &&
+ Instr.getOperand(2).getReg() == SrcReg2) ||
+ (Instr.getOperand(1).getReg() == SrcReg2 &&
+ Instr.getOperand(2).getReg() == SrcReg))) {
+ Sub = &*I;
+ break;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Return false if no candidates exist.
+ if (!MI && !Sub)
+ return false;
+
+ // The single candidate is called MI.
+ if (!MI) MI = Sub;
+
+ int NewOpC = -1;
+ MIOpC = MI->getOpcode();
+ if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+ NewOpC = MIOpC;
+ else {
+ NewOpC = PPC::getRecordFormOpcode(MIOpC);
+ if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
+ NewOpC = MIOpC;
+ }
+
+ // FIXME: On the non-embedded POWER architectures, only some of the record
+ // forms are fast, and we should use only the fast ones.
+
+ // The defining instruction has a record form (or is already a record
+ // form). It is possible, however, that we'll need to reverse the condition
+ // code of the users.
+ if (NewOpC == -1)
+ return false;
+
+ SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
+ SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
+
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
+ // needs to be updated to be based on SUB. Push the condition code
+ // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
+ // condition code of these operands will be modified.
+ bool ShouldSwap = false;
+ if (Sub) {
+ ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg;
+
+ // The operands to subf are the opposite of sub, so only in the fixed-point
+ // case, invert the order.
+ ShouldSwap = !ShouldSwap;
+ }
+
+ if (ShouldSwap)
+ for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
+ IE = MRI->use_end(); I != IE; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->getOpcode() == PPC::BCC) {
+ PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
+ assert((!equalityOnly ||
+ Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
+ "Invalid predicate for equality-only optimization");
+ PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)),
+ PPC::getSwappedPredicate(Pred)));
+ } else if (UseMI->getOpcode() == PPC::ISEL ||
+ UseMI->getOpcode() == PPC::ISEL8) {
+ unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
+ assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
+ "Invalid CR bit for equality-only optimization");
+
+ if (NewSubReg == PPC::sub_lt)
+ NewSubReg = PPC::sub_gt;
+ else if (NewSubReg == PPC::sub_gt)
+ NewSubReg = PPC::sub_lt;
+
+ SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)),
+ NewSubReg));
+ } else // We need to abort on a user we don't understand.
+ return false;
+ }
+
+ // Create a new virtual register to hold the value of the CR set by the
+ // record-form instruction. If the instruction was not previously in
+ // record form, then set the kill flag on the CR.
+ CmpInstr->eraseFromParent();
+
+ MachineBasicBlock::iterator MII = MI;
+ BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(),
+ get(TargetOpcode::COPY), CRReg)
+ .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
+
+ if (MIOpC != NewOpC) {
+ // We need to be careful here: we're replacing one instruction with
+ // another, and we need to make sure that we get all of the right
+ // implicit uses and defs. On the other hand, the caller may be holding
+ // an iterator to this instruction, and so we can't delete it (this is
+ // specifically the case if this is the instruction directly after the
+ // compare).
+
+ const MCInstrDesc &NewDesc = get(NewOpC);
+ MI->setDesc(NewDesc);
+
+ if (NewDesc.ImplicitDefs)
+ for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
+ *ImpDefs; ++ImpDefs)
+ if (!MI->definesRegister(*ImpDefs))
+ MI->addOperand(*MI->getParent()->getParent(),
+ MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (NewDesc.ImplicitUses)
+ for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
+ *ImpUses; ++ImpUses)
+ if (!MI->readsRegister(*ImpUses))
+ MI->addOperand(*MI->getParent()->getParent(),
+ MachineOperand::CreateReg(*ImpUses, false, true));
+ }
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
+ PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
+
+ for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
+ SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
+
+ return true;
+}
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
@@ -729,3 +1384,152 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 4; // PowerPC instructions are all 4 bytes
}
}
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-early-ret"
+STATISTIC(NumBCLR, "Number of early conditional returns");
+STATISTIC(NumBLR, "Number of early returns");
+
+namespace llvm {
+ void initializePPCEarlyReturnPass(PassRegistry&);
+}
+
+namespace {
+ // PPCEarlyReturn pass - For simple functions without epilogue code, move
+ // returns up, and create conditional returns, to avoid unnecessary
+ // branch-to-blr sequences.
+ struct PPCEarlyReturn : public MachineFunctionPass {
+ static char ID;
+ PPCEarlyReturn() : MachineFunctionPass(ID) {
+ initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &ReturnMBB) {
+ bool Changed = false;
+
+ MachineBasicBlock::iterator I = ReturnMBB.begin();
+ I = ReturnMBB.SkipPHIsAndLabels(I);
+
+ // The block must be essentially empty except for the blr.
+ if (I == ReturnMBB.end() || I->getOpcode() != PPC::BLR ||
+ I != ReturnMBB.getLastNonDebugInstr())
+ return Changed;
+
+ SmallVector<MachineBasicBlock*, 8> PredToRemove;
+ for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
+ PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+ bool OtherReference = false, BlockChanged = false;
+ for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
+ if (J->getOpcode() == PPC::B) {
+ if (J->getOperand(0).getMBB() == &ReturnMBB) {
+ // This is an unconditional branch to the return. Replace the
+ // branch with a blr.
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBLR;
+ continue;
+ }
+ } else if (J->getOpcode() == PPC::BCC) {
+ if (J->getOperand(2).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
+ .addImm(J->getOperand(0).getImm())
+ .addReg(J->getOperand(1).getReg());
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
+ } else if (J->isBranch()) {
+ if (J->isIndirectBranch()) {
+ if (ReturnMBB.hasAddressTaken())
+ OtherReference = true;
+ } else
+ for (unsigned i = 0; i < J->getNumOperands(); ++i)
+ if (J->getOperand(i).isMBB() &&
+ J->getOperand(i).getMBB() == &ReturnMBB)
+ OtherReference = true;
+ } else if (!J->isTerminator() && !J->isDebugValue())
+ break;
+
+ if (J == (*PI)->begin())
+ break;
+
+ --J;
+ }
+
+ if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+ OtherReference = true;
+
+ // Predecessors are stored in a vector and can't be removed here.
+ if (!OtherReference && BlockChanged) {
+ PredToRemove.push_back(*PI);
+ }
+
+ if (BlockChanged)
+ Changed = true;
+ }
+
+ for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
+ PredToRemove[i]->removeSuccessor(&ReturnMBB);
+
+ if (Changed && !ReturnMBB.hasAddressTaken()) {
+ // We now might be able to merge this blr-only block into its
+ // by-layout predecessor.
+ if (ReturnMBB.pred_size() == 1 &&
+ (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
+ // Move the blr into the preceding block.
+ MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
+ PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
+ PrevMBB.removeSuccessor(&ReturnMBB);
+ }
+
+ if (ReturnMBB.pred_empty())
+ ReturnMBB.eraseFromParent();
+ }
+
+ return Changed;
+ }
+
+public:
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ // If the function does not have at least two blocks, then there is
+ // nothing to do.
+ if (MF.size() < 2)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
+ "PowerPC Early-Return Creation", false, false)
+
+char PPCEarlyReturn::ID = 0;
+FunctionPass*
+llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 635e348..34a1a73 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -120,6 +120,17 @@ public:
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
+
+ // Select analysis.
+ virtual bool canInsertSelect(const MachineBasicBlock&,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned, unsigned, int&, int&, int&) const;
+ virtual void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const;
+
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -146,6 +157,66 @@ public:
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const;
+
+ // If conversion by predication (only supported by some branch instructions).
+ // All of the profitability checks always return true; it is always
+ // profitable to use the predicated branches.
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const {
+ return true;
+ }
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const;
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ const BranchProbability
+ &Probability) const {
+ return true;
+ }
+
+ virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const {
+ return false;
+ }
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const;
+
+ virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ virtual bool isPredicable(MachineInstr *MI) const;
+
+ // Comparison optimization.
+
+
+ virtual bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const;
+
+ virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const;
+
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index ab90762..4763069 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -319,10 +319,7 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
// PowerPC Flag Definitions.
class isPPC64 { bit PPC64 = 1; }
-class isDOT {
- list<Register> Defs = [CR0];
- bit RC = 1;
-}
+class isDOT { bit RC = 1; }
class RegConstraint<string C> {
string Constraints = C;
@@ -335,20 +332,111 @@ class NoEncode<string E> {
//===----------------------------------------------------------------------===//
// PowerPC Operand Definitions.
+// In the default PowerPC assembler syntax, registers are specified simply
+// by number, so they cannot be distinguished from immediate values (without
+// looking at the opcode). This means that the default operand matching logic
+// for the asm parser does not work, and we need to specify custom matchers.
+// Since those can only be specified with RegisterOperand classes and not
+// directly on the RegisterClass, all instructions patterns used by the asm
+// parser need to use a RegisterOperand (instead of a RegisterClass) for
+// all their register operands.
+// For this purpose, we define one RegisterOperand for each RegisterClass,
+// using the same name as the class, just in lower case.
+
+def PPCRegGPRCAsmOperand : AsmOperandClass {
+ let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
+}
+def gprc : RegisterOperand<GPRC> {
+ let ParserMatchClass = PPCRegGPRCAsmOperand;
+}
+def PPCRegG8RCAsmOperand : AsmOperandClass {
+ let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
+}
+def g8rc : RegisterOperand<G8RC> {
+ let ParserMatchClass = PPCRegG8RCAsmOperand;
+}
+def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
+ let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
+ let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
+}
+def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
+ let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
+}
+def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
+ let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
+}
+def PPCRegF8RCAsmOperand : AsmOperandClass {
+ let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
+}
+def f8rc : RegisterOperand<F8RC> {
+ let ParserMatchClass = PPCRegF8RCAsmOperand;
+}
+def PPCRegF4RCAsmOperand : AsmOperandClass {
+ let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
+}
+def f4rc : RegisterOperand<F4RC> {
+ let ParserMatchClass = PPCRegF4RCAsmOperand;
+}
+def PPCRegVRRCAsmOperand : AsmOperandClass {
+ let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
+}
+def vrrc : RegisterOperand<VRRC> {
+ let ParserMatchClass = PPCRegVRRCAsmOperand;
+}
+def PPCRegCRBITRCAsmOperand : AsmOperandClass {
+ let Name = "RegCRBITRC"; let PredicateMethod = "isRegNumber";
+}
+def crbitrc : RegisterOperand<CRBITRC> {
+ let ParserMatchClass = PPCRegCRBITRCAsmOperand;
+}
+def PPCRegCRRCAsmOperand : AsmOperandClass {
+ let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
+}
+def crrc : RegisterOperand<CRRC> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCS5ImmAsmOperand : AsmOperandClass {
+ let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+ let RenderMethod = "addImmOperands";
+}
def s5imm : Operand<i32> {
let PrintMethod = "printS5ImmOperand";
+ let ParserMatchClass = PPCS5ImmAsmOperand;
+}
+def PPCU5ImmAsmOperand : AsmOperandClass {
+ let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+ let RenderMethod = "addImmOperands";
}
def u5imm : Operand<i32> {
let PrintMethod = "printU5ImmOperand";
+ let ParserMatchClass = PPCU5ImmAsmOperand;
+}
+def PPCU6ImmAsmOperand : AsmOperandClass {
+ let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+ let RenderMethod = "addImmOperands";
}
def u6imm : Operand<i32> {
let PrintMethod = "printU6ImmOperand";
+ let ParserMatchClass = PPCU6ImmAsmOperand;
+}
+def PPCS16ImmAsmOperand : AsmOperandClass {
+ let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addImmOperands";
}
def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+}
+def PPCU16ImmAsmOperand : AsmOperandClass {
+ let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
+ let RenderMethod = "addImmOperands";
}
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
}
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
@@ -367,21 +455,49 @@ def aaddr : Operand<iPTR> {
def symbolHi: Operand<i32> {
let PrintMethod = "printSymbolHi";
let EncoderMethod = "getHA16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
}
def symbolLo: Operand<i32> {
let PrintMethod = "printSymbolLo";
let EncoderMethod = "getLO16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+}
+def PPCCRBitMaskOperand : AsmOperandClass {
+ let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
}
def crbitm: Operand<i8> {
let PrintMethod = "printcrbitm";
let EncoderMethod = "get_crbitm_encoding";
+ let ParserMatchClass = PPCCRBitMaskOperand;
}
// Address operands
// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
-def ptr_rc_nor0 : PointerLikeRegClass<1>;
+def PPCRegGxRCNoR0Operand : AsmOperandClass {
+ let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
+ let ParserMatchClass = PPCRegGxRCNoR0Operand;
+}
+// A version of ptr_rc usable with the asm parser.
+def PPCRegGxRCOperand : AsmOperandClass {
+ let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
+ let ParserMatchClass = PPCRegGxRCOperand;
+}
-def dispRI : Operand<iPTR>;
-def dispRIX : Operand<iPTR>;
+def PPCDispRIOperand : AsmOperandClass {
+ let Name = "DispRI"; let PredicateMethod = "isS16Imm";
+}
+def dispRI : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIOperand;
+}
+def PPCDispRIXOperand : AsmOperandClass {
+ let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
+}
+def dispRIX : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIXOperand;
+}
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
@@ -390,7 +506,7 @@ def memri : Operand<iPTR> {
}
def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
- let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
}
def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
let PrintMethod = "printMemRegImmShifted";
@@ -407,7 +523,7 @@ def memr : Operand<iPTR> {
// PowerPC Predicate operand.
def pred : Operand<OtherVT> {
let PrintMethod = "printPredicateOperand";
- let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
+ let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg);
}
// Define PowerPC specific addressing mode.
@@ -430,6 +546,252 @@ def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
//===----------------------------------------------------------------------===//
+// PowerPC Multiclass Definitions.
+
+multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_10<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XForm_10<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XForm_10<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XForm_10<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_11<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XForm_11<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MForm_2<opcode, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MForm_2<opcode, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MDForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MDForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MDSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MDSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_26<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : XForm_26<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_3<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_3<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
// Pseudo-instructions:
@@ -442,12 +804,12 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCAL
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
-def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
"UPDATE_VRSAVE $rD, $rS", []>;
}
let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
+def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
[(set i32:$result,
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
@@ -458,21 +820,21 @@ let usesCustomInserter = 1, // Expanded after instruction selection.
// Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
// because either operand might become the first operand in an isel, and
// that operand cannot be r0.
- def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
- GPRC_NOR0:$T, GPRC_NOR0:$F,
+ def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+ gprc_nor0:$T, gprc_nor0:$F,
i32imm:$BROPC), "#SELECT_CC_I4",
[]>;
- def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
- G8RC_NOX0:$T, G8RC_NOX0:$F,
+ def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+ g8rc_nox0:$T, g8rc_nox0:$F,
i32imm:$BROPC), "#SELECT_CC_I8",
[]>;
- def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+ def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
i32imm:$BROPC), "#SELECT_CC_F4",
[]>;
- def SELECT_CC_F8 : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+ def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
i32imm:$BROPC), "#SELECT_CC_F8",
[]>;
- def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+ def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
i32imm:$BROPC), "#SELECT_CC_VRRC",
[]>;
}
@@ -480,21 +842,26 @@ let usesCustomInserter = 1, // Expanded after instruction selection.
// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
// scavenge a register for it.
let mayStore = 1 in
-def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
"#SPILL_CR", []>;
// RESTORE_CR - Indicate that we're restoring the CR register (previously
// spilled), so we'll need to scavenge a register for it.
let mayLoad = 1 in
-def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
+def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
"#RESTORE_CR", []>;
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
[(retflag)]>;
- let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+
+ let isCodeGenOnly = 1 in
+ def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr ${cond:reg}", BrB, []>;
+ }
}
let Defs = [LR] in
@@ -511,10 +878,21 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
// BCC represents an arbitrary conditional branch on a predicate.
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
// a two-value operand where a dag node expects two operands. :(
- let isCodeGenOnly = 1 in
+ let isCodeGenOnly = 1 in {
def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
"b${cond:cc} ${cond:reg}, $dst"
- /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+ /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ let isReturn = 1, Uses = [LR, RM] in
+ def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}lr ${cond:reg}", BrB, []>;
+
+ let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
+ def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ "bdzlr", BrB, []>;
+ def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ "bdnzlr", BrB, []>;
+ }
+ }
let Defs = [CTR], Uses = [CTR] in {
def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
@@ -544,6 +922,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
"bctrl", BrB, [(PPCbctrl)]>,
Requires<[In32BitMode]>;
+
+ let isCodeGenOnly = 1 in
+ def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl ${cond:reg}", BrB, []>;
}
}
@@ -589,7 +971,7 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
[]>;
let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
- def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+ def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP32",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
Requires<[In32BitMode]>;
@@ -638,89 +1020,89 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
[(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
[(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
[(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
[(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
[(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
[(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
[(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
[(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
[(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
[(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
[(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
[(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
[(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
[(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_AND_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
[(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_OR_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
[(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
[(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
[(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
def ATOMIC_CMP_SWAP_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
[(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
[(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
[(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_SWAP_I8 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
[(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I16 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
[(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
def ATOMIC_SWAP_I32 : Pseudo<
- (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
[(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
}
}
// Instructions to support atomic operations
-def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src),
+def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
"lwarx $rD, $src", LdStLWARX,
[(set i32:$rD, (PPClarx xoaddr:$src))]>;
let Defs = [CR0] in
-def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
"stwcx. $rS, $dst", LdStSTWCX,
[(PPCstcx i32:$rS, xoaddr:$dst)]>,
isDOT;
@@ -734,93 +1116,93 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
// Unindexed (r+i) Loads.
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
-def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
"lha $rD, $src", LdStLHA,
[(set i32:$rD, (sextloadi16 iaddr:$src))]>,
PPC970_DGroup_Cracked;
-def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
"lhz $rD, $src", LdStLoad,
[(set i32:$rD, (zextloadi16 iaddr:$src))]>;
-def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
"lwz $rD, $src", LdStLoad,
[(set i32:$rD, (load iaddr:$src))]>;
-def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
"lfs $rD, $src", LdStLFD,
[(set f32:$rD, (load iaddr:$src))]>;
-def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
"lfd $rD, $src", LdStLFD,
[(set f64:$rD, (load iaddr:$src))]>;
// Unindexed (r+i) Loads with Update (preinc).
-let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhau $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lhzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lwzu $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfsu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lfdu $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lbzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhaux $rD, $addr", LdStLHAU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lhzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lwzux $rD, $addr", LdStLoadUpd,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfsux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
(ins memrr:$addr),
"lfdux $rD, $addr", LdStLFDU,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
@@ -831,39 +1213,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src),
+def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
-def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src),
"lhax $rD, $src", LdStLHA,
[(set i32:$rD, (sextloadi16 xaddr:$src))]>,
PPC970_DGroup_Cracked;
-def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
"lhzx $rD, $src", LdStLoad,
[(set i32:$rD, (zextloadi16 xaddr:$src))]>;
-def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src),
+def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
"lwzx $rD, $src", LdStLoad,
[(set i32:$rD, (load xaddr:$src))]>;
-def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
"lhbrx $rD, $src", LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
-def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src),
+def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src),
"lwbrx $rD, $src", LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
-def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src),
"lfsx $frD, $src", LdStLFD,
[(set f32:$frD, (load xaddr:$src))]>;
-def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src),
"lfdx $frD, $src", LdStLFD,
[(set f64:$frD, (load xaddr:$src))]>;
-def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src),
"lfiwax $frD, $src", LdStLFD,
[(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
-def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
"lfiwzx $frD, $src", LdStLFD,
[(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
}
@@ -874,38 +1256,38 @@ def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
// Unindexed (r+i) Stores.
let PPC970_Unit = 2 in {
-def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
"stb $rS, $src", LdStStore,
[(truncstorei8 i32:$rS, iaddr:$src)]>;
-def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
"sth $rS, $src", LdStStore,
[(truncstorei16 i32:$rS, iaddr:$src)]>;
-def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
"stw $rS, $src", LdStStore,
[(store i32:$rS, iaddr:$src)]>;
-def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
"stfs $rS, $dst", LdStSTFD,
[(store f32:$rS, iaddr:$dst)]>;
-def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
"stfd $rS, $dst", LdStSTFD,
[(store f64:$rS, iaddr:$dst)]>;
}
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1 in {
-def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"stbu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"sthu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"stwu $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
"stfsu $rS, $dst", LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
"stfdu $rS, $dst", LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
@@ -926,59 +1308,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
// Indexed (r+r) Stores.
let PPC970_Unit = 2 in {
-def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
"stbx $rS, $dst", LdStStore,
[(truncstorei8 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
"sthx $rS, $dst", LdStStore,
[(truncstorei16 i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
"stwx $rS, $dst", LdStStore,
[(store i32:$rS, xaddr:$dst)]>,
PPC970_DGroup_Cracked;
-def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
"sthbrx $rS, $dst", LdStStore,
[(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
PPC970_DGroup_Cracked;
-def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
"stwbrx $rS, $dst", LdStStore,
[(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
PPC970_DGroup_Cracked;
-def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
"stfiwx $frS, $dst", LdStSTFD,
[(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
-def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
"stfsx $frS, $dst", LdStSTFD,
[(store f32:$frS, xaddr:$dst)]>;
-def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
"stfdx $frS, $dst", LdStSTFD,
[(store f64:$frS, xaddr:$dst)]>;
}
// Indexed (r+r) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1 in {
-def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"stbux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"sthux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"stwux $rS, $dst", LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst),
"stfsux $rS, $dst", LdStSTFDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
-def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst),
"stfdux $rS, $dst", LdStSTFDU, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
@@ -1007,193 +1389,206 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
//
let PPC970_Unit = 1 in { // FXU Operations.
-def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
+def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$imm),
"addi $rD, $rA, $imm", IntSimple,
[(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
-let Defs = [CARRY] in {
-def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+let BaseName = "addic" in {
+let Defs = [CARRY] in
+def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"addic $rD, $rA, $imm", IntGeneral,
[(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
- PPC970_DGroup_Cracked;
-def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+ RecFormRel, PPC970_DGroup_Cracked;
+let Defs = [CARRY, CR0] in
+def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IntGeneral,
- []>;
+ []>, isDOT, RecFormRel;
}
-def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
+def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolHi:$imm),
"addis $rD, $rA, $imm", IntSimple,
[(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
let isCodeGenOnly = 1 in
-def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
+def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$sym),
"la $rD, $sym($rA)", IntGeneral,
[(set i32:$rD, (add i32:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
-def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"mulli $rD, $rA, $imm", IntMulLI,
[(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
-let Defs = [CARRY] in {
-def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+let Defs = [CARRY] in
+def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
[(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
-}
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
- def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+ def LI : DForm_2_r0<14, (outs gprc:$rD), (ins symbolLo:$imm),
"li $rD, $imm", IntSimple,
[(set i32:$rD, immSExt16:$imm)]>;
- def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+ def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins symbolHi:$imm),
"lis $rD, $imm", IntSimple,
[(set i32:$rD, imm16ShiftedSExt:$imm)]>;
}
}
let PPC970_Unit = 1 in { // FXU Operations.
-def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+let Defs = [CR0] in {
+def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"andi. $dst, $src1, $src2", IntGeneral,
[(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
isDOT;
-def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"andis. $dst, $src1, $src2", IntGeneral,
[(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
isDOT;
-def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+}
+def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"ori $dst, $src1, $src2", IntSimple,
[(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
-def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"oris $dst, $src1, $src2", IntSimple,
[(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"xori $dst, $src1, $src2", IntSimple,
[(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
-def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
"xoris $dst, $src1, $src2", IntSimple,
[(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
[]>;
-def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
- "cmpwi $crD, $rA, $imm", IntCompare>;
-def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
- "cmplwi $dst, $src1, $src2", IntCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+ def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IntCompare>;
+ def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+}
+
+let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
+defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "nand", "$rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
+defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "and", "$rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "andc", "$rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "or", "$rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
+defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "nor", "$rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "orc", "$rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "eqv", "$rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
+defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "xor", "$rA, $rS, $rB", IntSimple,
+ [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "slw", "$rA, $rS, $rB", IntGeneral,
+ [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
+defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "srw", "$rA, $rS, $rB", IntGeneral,
+ [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
+defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "sraw", "$rA, $rS, $rB", IntShift,
+ [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
}
-
let PPC970_Unit = 1 in { // FXU Operations.
-def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "nand $rA, $rS, $rB", IntSimple,
- [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
-def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "and $rA, $rS, $rB", IntSimple,
- [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
-def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "andc $rA, $rS, $rB", IntSimple,
- [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
-def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "or $rA, $rS, $rB", IntSimple,
- [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
-def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "nor $rA, $rS, $rB", IntSimple,
- [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
-def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "orc $rA, $rS, $rB", IntSimple,
- [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
-def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "eqv $rA, $rS, $rB", IntSimple,
- [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
-def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "xor $rA, $rS, $rB", IntSimple,
- [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
-def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "slw $rA, $rS, $rB", IntGeneral,
- [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
-def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "srw $rA, $rS, $rB", IntGeneral,
- [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
-let Defs = [CARRY] in {
-def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
- "sraw $rA, $rS, $rB", IntShift,
- [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
+let neverHasSideEffects = 1 in {
+defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
+ "srawi", "$rA, $rS, $SH", IntShift,
+ [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
+defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS),
+ "cntlzw", "$rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctlz i32:$rS))]>;
+defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
+ "extsb", "$rA, $rS", IntSimple,
+ [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
+defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
+ "extsh", "$rA, $rS", IntSimple,
+ [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
+}
+let isCompare = 1, neverHasSideEffects = 1 in {
+ def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+ "cmpw $crD, $rA, $rB", IntCompare>;
+ def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+ "cmplw $crD, $rA, $rB", IntCompare>;
}
}
-
-let PPC970_Unit = 1 in { // FXU Operations.
-let Defs = [CARRY] in {
-def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH),
- "srawi $rA, $rS, $SH", IntShift,
- [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
-}
-def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS),
- "cntlzw $rA, $rS", IntGeneral,
- [(set i32:$rA, (ctlz i32:$rS))]>;
-def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsb $rA, $rS", IntSimple,
- [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
-def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
- "extsh $rA, $rS", IntSimple,
- [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
-
-def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
- "cmpw $crD, $rA, $rB", IntCompare>;
-def CMPLW : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
- "cmplw $crD, $rA, $rB", IntCompare>;
-}
let PPC970_Unit = 3 in { // FPU Operations.
//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
// "fcmpo $crD, $fA, $fB", FPCompare>;
-def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
- "fcmpu $crD, $fA, $fB", FPCompare>;
-def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
- "fcmpu $crD, $fA, $fB", FPCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+ def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+ def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "fcmpu $crD, $fA, $fB", FPCompare>;
+}
let Uses = [RM] in {
- def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
- "fctiwz $frD, $frB", FPGeneral,
- [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+ let neverHasSideEffects = 1 in {
+ defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwz", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
- def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
- "frsp $frD, $frB", FPGeneral,
- [(set f32:$frD, (fround f64:$frB))]>;
+ defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
+ "frsp", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (fround f64:$frB))]>;
// The frin -> nearbyint mapping is valid only in fast-math mode.
- def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
- "frin $frD, $frB", FPGeneral,
- [(set f64:$frD, (fnearbyint f64:$frB))]>;
- def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
- "frin $frD, $frB", FPGeneral,
- [(set f32:$frD, (fnearbyint f32:$frB))]>;
+ let Interpretation64Bit = 1 in
+ defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frin", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (fnearbyint f64:$frB))]>;
+ defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frin", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (fnearbyint f32:$frB))]>;
+ }
// These pseudos expand to rint but also set FE_INEXACT when the result does
// not equal the argument.
let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
- def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+ def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB),
"#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
- def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+ def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB),
"#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
}
- def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
- "frip $frD, $frB", FPGeneral,
- [(set f64:$frD, (fceil f64:$frB))]>;
- def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
- "frip $frD, $frB", FPGeneral,
- [(set f32:$frD, (fceil f32:$frB))]>;
- def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
- "friz $frD, $frB", FPGeneral,
- [(set f64:$frD, (ftrunc f64:$frB))]>;
- def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
- "friz $frD, $frB", FPGeneral,
- [(set f32:$frD, (ftrunc f32:$frB))]>;
- def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
- "frim $frD, $frB", FPGeneral,
- [(set f64:$frD, (ffloor f64:$frB))]>;
- def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
- "frim $frD, $frB", FPGeneral,
- [(set f32:$frD, (ffloor f32:$frB))]>;
-
- def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
- "fsqrt $frD, $frB", FPSqrt,
- [(set f64:$frD, (fsqrt f64:$frB))]>;
- def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
- "fsqrts $frD, $frB", FPSqrt,
- [(set f32:$frD, (fsqrt f32:$frB))]>;
+ let neverHasSideEffects = 1 in {
+ let Interpretation64Bit = 1 in
+ defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frip", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (fceil f64:$frB))]>;
+ defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frip", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (fceil f32:$frB))]>;
+ let Interpretation64Bit = 1 in
+ defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
+ "friz", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (ftrunc f64:$frB))]>;
+ defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
+ "friz", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (ftrunc f32:$frB))]>;
+ let Interpretation64Bit = 1 in
+ defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frim", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (ffloor f64:$frB))]>;
+ defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frim", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (ffloor f32:$frB))]>;
+
+ defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fsqrt", "$frD, $frB", FPSqrt,
+ [(set f64:$frD, (fsqrt f64:$frB))]>;
+ defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fsqrts", "$frD, $frB", FPSqrt,
+ [(set f32:$frD, (fsqrt f32:$frB))]>;
+ }
}
}
@@ -1201,69 +1596,74 @@ let Uses = [RM] in {
/// often coalesced away and we don't want the dispatch group builder to think
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
-def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
- "fmr $frD, $frB", FPGeneral,
- []>, // (set f32:$frD, f32:$frB)
- PPC970_Unit_Pseudo;
+let neverHasSideEffects = 1 in
+defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fmr", "$frD, $frB", FPGeneral,
+ []>, // (set f32:$frD, f32:$frB)
+ PPC970_Unit_Pseudo;
-let PPC970_Unit = 3 in { // FPU Operations.
+let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
-def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
- "fabs $frD, $frB", FPGeneral,
- [(set f32:$frD, (fabs f32:$frB))]>;
-def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
- "fabs $frD, $frB", FPGeneral,
- [(set f64:$frD, (fabs f64:$frB))]>;
-def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
- "fnabs $frD, $frB", FPGeneral,
- [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
-def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
- "fnabs $frD, $frB", FPGeneral,
- [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
-def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
- "fneg $frD, $frB", FPGeneral,
- [(set f32:$frD, (fneg f32:$frB))]>;
-def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
- "fneg $frD, $frB", FPGeneral,
- [(set f64:$frD, (fneg f64:$frB))]>;
+defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fabs", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (fabs f32:$frB))]>;
+let Interpretation64Bit = 1 in
+defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fabs", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (fabs f64:$frB))]>;
+defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fnabs", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
+let Interpretation64Bit = 1 in
+defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fnabs", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
+defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fneg", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (fneg f32:$frB))]>;
+let Interpretation64Bit = 1 in
+defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fneg", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (fneg f64:$frB))]>;
// Reciprocal estimates.
-def FRE : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
- "fre $frD, $frB", FPGeneral,
- [(set f64:$frD, (PPCfre f64:$frB))]>;
-def FRES : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
- "fres $frD, $frB", FPGeneral,
- [(set f32:$frD, (PPCfre f32:$frB))]>;
-def FRSQRTE : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
- "frsqrte $frD, $frB", FPGeneral,
- [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
-def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
- "frsqrtes $frD, $frB", FPGeneral,
- [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
+defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fre", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfre f64:$frB))]>;
+defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fres", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfre f32:$frB))]>;
+defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frsqrte", "$frD, $frB", FPGeneral,
+ [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frsqrtes", "$frD, $frB", FPGeneral,
+ [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
}
// XL-Form instructions. condition register logical ops.
//
-def MCRF : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+let neverHasSideEffects = 1 in
+def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
"mcrf $BF, $BFA", BrMCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
-def CREQV : XLForm_1<19, 289, (outs CRBITRC:$CRD),
- (ins CRBITRC:$CRA, CRBITRC:$CRB),
+def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
"creqv $CRD, $CRA, $CRB", BrCR,
[]>;
-def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD),
- (ins CRBITRC:$CRA, CRBITRC:$CRB),
+def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
"cror $CRD, $CRA, $CRB", BrCR,
[]>;
let isCodeGenOnly = 1 in {
-def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
"creqv $dst, $dst, $dst", BrCR,
[]>;
-def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
+def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
"crxor $dst, $dst, $dst", BrCR,
[]>;
@@ -1281,23 +1681,23 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
// XFX-Form instructions. Instructions that deal with SPRs.
//
let Uses = [CTR] in {
-def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
"mfctr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
-def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Defs = [LR] in {
-def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
"mtlr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR] in {
-def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
"mflr $rT", SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -1305,19 +1705,19 @@ def MFLR : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
// a GPR on the PPC970. As such, copies in and out have the same performance
// characteristics as an OR instruction.
-def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
"mtspr 256, $rS", IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
-def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
let isCodeGenOnly = 1 in {
def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
- (outs VRSAVERC:$reg), (ins GPRC:$rS),
+ (outs VRSAVERC:$reg), (ins gprc:$rS),
"mtspr 256, $rS", IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
- def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
(ins VRSAVERC:$reg),
"mfspr $rT, 256", IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -1335,7 +1735,8 @@ let mayLoad = 1 in
def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
-def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
+let neverHasSideEffects = 1 in {
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins gprc:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
@@ -1350,21 +1751,23 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
//
// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
let isCodeGenOnly = 1 in
-def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+def MFCRpseud: XFXForm_3<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
"#MFCRpseud", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
-
-def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
- "mfcr $rT", SprMFCR>,
- PPC970_MicroCode, PPC970_Unit_CRU;
-def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+} // neverHasSideEffects = 1
+
+let neverHasSideEffects = 1 in
+def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
+ "mfcr $rT", SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
// Pseudo instruction to perform FADD in round-to-zero mode.
let usesCustomInserter = 1, Uses = [RM] in {
- def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+ def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
[(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
}
@@ -1377,123 +1780,118 @@ let Uses = [RM], Defs = [RM] in {
def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
"mtfsb1 $FM", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+ def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
"mtfsf $FM, $rT", IntMTFSB0, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM] in {
- def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins),
+ def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
"mffs $rT", IntMFFS,
[(set f64:$rT, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
-let PPC970_Unit = 1 in { // FXU Operations.
-
+let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
//
-def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "add $rT, $rA, $rB", IntSimple,
- [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
-let Defs = [CARRY] in {
-def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "addc $rT, $rA, $rB", IntGeneral,
- [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
- PPC970_DGroup_Cracked;
-}
-def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "divw $rT, $rA, $rB", IntDivW,
- [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "divwu $rT, $rA, $rB", IntDivW,
- [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "mulhw $rT, $rA, $rB", IntMulHW,
- [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
-def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "mulhwu $rT, $rA, $rB", IntMulHWU,
- [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
-def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "mullw $rT, $rA, $rB", IntMulHW,
- [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
-def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "subf $rT, $rA, $rB", IntGeneral,
- [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
-let Defs = [CARRY] in {
-def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "subfc $rT, $rA, $rB", IntGeneral,
- [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
- PPC970_DGroup_Cracked;
-}
-def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
- "neg $rT, $rA", IntSimple,
- [(set i32:$rT, (ineg i32:$rA))]>;
-let Uses = [CARRY], Defs = [CARRY] in {
-def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "adde $rT, $rA, $rB", IntGeneral,
- [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
-def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
- "addme $rT, $rA", IntGeneral,
- [(set i32:$rT, (adde i32:$rA, -1))]>;
-def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
- "addze $rT, $rA", IntGeneral,
- [(set i32:$rT, (adde i32:$rA, 0))]>;
-def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
- "subfe $rT, $rA, $rB", IntGeneral,
- [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
-def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
- "subfme $rT, $rA", IntGeneral,
- [(set i32:$rT, (sube -1, i32:$rA))]>;
-def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
- "subfze $rT, $rA", IntGeneral,
- [(set i32:$rT, (sube 0, i32:$rA))]>;
+defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "add", "$rT, $rA, $rB", IntSimple,
+ [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "addc", "$rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_Cracked;
+defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divw", "$rT, $rA, $rB", IntDivW,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwu", "$rT, $rA, $rB", IntDivW,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mulhw", "$rT, $rA, $rB", IntMulHW,
+ [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
+defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mulhwu", "$rT, $rA, $rB", IntMulHWU,
+ [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
+defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mullw", "$rT, $rA, $rB", IntMulHW,
+ [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subf", "$rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subfc", "$rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
+ PPC970_DGroup_Cracked;
+defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "neg", "$rT, $rA", IntSimple,
+ [(set i32:$rT, (ineg i32:$rA))]>;
+let Uses = [CARRY] in {
+defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "adde", "$rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
+defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "addme", "$rT, $rA", IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, -1))]>;
+defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "addze", "$rT, $rA", IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, 0))]>;
+defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subfe", "$rT, $rA, $rB", IntGeneral,
+ [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
+defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "subfme", "$rT, $rA", IntGeneral,
+ [(set i32:$rT, (sube -1, i32:$rA))]>;
+defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "subfze", "$rT, $rA", IntGeneral,
+ [(set i32:$rT, (sube 0, i32:$rA))]>;
}
}
// A-Form instructions. Most of the instructions executed in the FPU are of
// this type.
//
-let PPC970_Unit = 3 in { // FPU Operations.
+let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
let Uses = [RM] in {
- def FMADD : AForm_1<63, 29,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
- "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ defm FMADD : AForm_1r<63, 29,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
[(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
- def FMADDS : AForm_1<59, 29,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
- "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ defm FMADDS : AForm_1r<59, 29,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
- def FMSUB : AForm_1<63, 28,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
- "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ defm FMSUB : AForm_1r<63, 28,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
[(set f64:$FRT,
(fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
- def FMSUBS : AForm_1<59, 28,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
- "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ defm FMSUBS : AForm_1r<59, 28,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT,
(fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
- def FNMADD : AForm_1<63, 31,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
- "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+ defm FNMADD : AForm_1r<63, 31,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
[(set f64:$FRT,
(fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
- def FNMADDS : AForm_1<59, 31,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
- "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ defm FNMADDS : AForm_1r<59, 31,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT,
(fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
- def FNMSUB : AForm_1<63, 30,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
- "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+ defm FNMSUB : AForm_1r<63, 30,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
[(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
(fneg f64:$FRB))))]>;
- def FNMSUBS : AForm_1<59, 30,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
- "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+ defm FNMSUBS : AForm_1r<59, 30,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
(fneg f32:$FRB))))]>;
}
@@ -1501,53 +1899,56 @@ let Uses = [RM] in {
// having 4 of these, force the comparison to always be an 8-byte double (code
// should use an FMRSD if the input comparison value really wants to be a float)
// and 4/8 byte forms for the result and operand type..
-def FSELD : AForm_1<63, 23,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
- "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
-def FSELS : AForm_1<63, 23,
- (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
- "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
- [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
+let Interpretation64Bit = 1 in
+defm FSELD : AForm_1r<63, 23,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
+defm FSELS : AForm_1r<63, 23,
+ (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+ [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
let Uses = [RM] in {
- def FADD : AForm_2<63, 21,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fadd $FRT, $FRA, $FRB", FPAddSub,
- [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
- def FADDS : AForm_2<59, 21,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
- "fadds $FRT, $FRA, $FRB", FPGeneral,
- [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
- def FDIV : AForm_2<63, 18,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fdiv $FRT, $FRA, $FRB", FPDivD,
- [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
- def FDIVS : AForm_2<59, 18,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
- "fdivs $FRT, $FRA, $FRB", FPDivS,
- [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
- def FMUL : AForm_3<63, 25,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
- "fmul $FRT, $FRA, $FRC", FPFused,
- [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
- def FMULS : AForm_3<59, 25,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
- "fmuls $FRT, $FRA, $FRC", FPGeneral,
- [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
- def FSUB : AForm_2<63, 20,
- (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
- "fsub $FRT, $FRA, $FRB", FPAddSub,
- [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
- def FSUBS : AForm_2<59, 20,
- (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
- "fsubs $FRT, $FRA, $FRB", FPGeneral,
- [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
+ defm FADD : AForm_2r<63, 21,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fadd", "$FRT, $FRA, $FRB", FPAddSub,
+ [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+ defm FADDS : AForm_2r<59, 21,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fadds", "$FRT, $FRA, $FRB", FPGeneral,
+ [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+ defm FDIV : AForm_2r<63, 18,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fdiv", "$FRT, $FRA, $FRB", FPDivD,
+ [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+ defm FDIVS : AForm_2r<59, 18,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fdivs", "$FRT, $FRA, $FRB", FPDivS,
+ [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+ defm FMUL : AForm_3r<63, 25,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
+ "fmul", "$FRT, $FRA, $FRC", FPFused,
+ [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+ defm FMULS : AForm_3r<59, 25,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
+ "fmuls", "$FRT, $FRA, $FRC", FPGeneral,
+ [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+ defm FSUB : AForm_2r<63, 20,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fsub", "$FRT, $FRA, $FRB", FPAddSub,
+ [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+ defm FSUBS : AForm_2r<59, 20,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fsubs", "$FRT, $FRA, $FRB", FPGeneral,
+ [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
}
}
+let neverHasSideEffects = 1 in {
let PPC970_Unit = 1 in { // FXU Operations.
+ let isSelect = 1 in
def ISEL : AForm_4<31, 15,
- (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
+ (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
"isel $rT, $rA, $rB, $cond", IntGeneral,
[]>;
}
@@ -1557,26 +1958,29 @@ let PPC970_Unit = 1 in { // FXU Operations.
//
let isCommutable = 1 in {
// RLWIMI can be commuted if the rotate amount is zero.
-def RLWIMI : MForm_2<20,
- (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB,
- u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
- []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
- NoEncode<"$rSi">;
+defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
+ (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate,
+ []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
}
+let BaseName = "rlwinm" in {
def RLWINM : MForm_2<21,
- (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
"rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
- []>;
+ []>, RecFormRel;
+let Defs = [CR0] in
def RLWINMo : MForm_2<21,
- (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
- []>, isDOT, PPC970_DGroup_Cracked;
-def RLWNM : MForm_2<23,
- (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
- "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
- []>;
+ (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+ []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
}
-
+defm RLWNM : MForm_2r<23, (outs gprc:$rA),
+ (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral,
+ []>;
+}
+} // neverHasSideEffects = 1
//===----------------------------------------------------------------------===//
// PowerPC Instruction Patterns
@@ -1693,14 +2097,6 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
def : Pat<(f64 (fextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
-// Memory barriers
-def : Pat<(membarrier (i32 imm /*ll*/),
- (i32 imm /*ls*/),
- (i32 imm /*sl*/),
- (i32 imm /*ss*/),
- (i32 imm /*device*/)),
- (SYNC)>;
-
def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
@@ -1715,3 +2111,98 @@ def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instructions used for assembler/disassembler only
+//
+
+def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
+ "isync", SprISYNC, []>;
+
+def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
+ "icbi $src", LdStICBI, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Assembler Instruction Aliases
+//
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class PPCAsmPseudo<string asm, dag iops>
+ : Instruction {
+ let Namespace = "PPC";
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let OutOperandList = (outs);
+ let InOperandList = iops;
+ let Pattern = [];
+ let AsmString = asm;
+ let isAsmParserOnly = 1;
+ let isPseudo = 1;
+}
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+
+def : InstAlias<"blt $cc, $dst", (BCC 12, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bgt $cc, $dst", (BCC 44, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"beq $cc, $dst", (BCC 76, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bun $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bso $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bge $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bnl $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"ble $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bng $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bne $cc, $dst", (BCC 68, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bnu $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bns $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>;
+
+def : InstAlias<"bltlr $cc", (BCLR 12, crrc:$cc)>;
+def : InstAlias<"bgtlr $cc", (BCLR 44, crrc:$cc)>;
+def : InstAlias<"beqlr $cc", (BCLR 76, crrc:$cc)>;
+def : InstAlias<"bunlr $cc", (BCLR 108, crrc:$cc)>;
+def : InstAlias<"bsolr $cc", (BCLR 108, crrc:$cc)>;
+def : InstAlias<"bgelr $cc", (BCLR 4, crrc:$cc)>;
+def : InstAlias<"bnllr $cc", (BCLR 4, crrc:$cc)>;
+def : InstAlias<"blelr $cc", (BCLR 36, crrc:$cc)>;
+def : InstAlias<"bnglr $cc", (BCLR 36, crrc:$cc)>;
+def : InstAlias<"bnelr $cc", (BCLR 68, crrc:$cc)>;
+def : InstAlias<"bnulr $cc", (BCLR 100, crrc:$cc)>;
+def : InstAlias<"bnslr $cc", (BCLR 100, crrc:$cc)>;
+
+def : InstAlias<"bltctr $cc", (BCCTR 12, crrc:$cc)>;
+def : InstAlias<"bgtctr $cc", (BCCTR 44, crrc:$cc)>;
+def : InstAlias<"beqctr $cc", (BCCTR 76, crrc:$cc)>;
+def : InstAlias<"bunctr $cc", (BCCTR 108, crrc:$cc)>;
+def : InstAlias<"bsoctr $cc", (BCCTR 108, crrc:$cc)>;
+def : InstAlias<"bgectr $cc", (BCCTR 4, crrc:$cc)>;
+def : InstAlias<"bnlctr $cc", (BCCTR 4, crrc:$cc)>;
+def : InstAlias<"blectr $cc", (BCCTR 36, crrc:$cc)>;
+def : InstAlias<"bngctr $cc", (BCCTR 36, crrc:$cc)>;
+def : InstAlias<"bnectr $cc", (BCCTR 68, crrc:$cc)>;
+def : InstAlias<"bnuctr $cc", (BCCTR 100, crrc:$cc)>;
+def : InstAlias<"bnsctr $cc", (BCCTR 100, crrc:$cc)>;
+
+def : InstAlias<"bltctrl $cc", (BCCTRL 12, crrc:$cc)>;
+def : InstAlias<"bgtctrl $cc", (BCCTRL 44, crrc:$cc)>;
+def : InstAlias<"beqctrl $cc", (BCCTRL 76, crrc:$cc)>;
+def : InstAlias<"bunctrl $cc", (BCCTRL 108, crrc:$cc)>;
+def : InstAlias<"bsoctrl $cc", (BCCTRL 108, crrc:$cc)>;
+def : InstAlias<"bgectrl $cc", (BCCTRL 4, crrc:$cc)>;
+def : InstAlias<"bnlctrl $cc", (BCCTRL 4, crrc:$cc)>;
+def : InstAlias<"blectrl $cc", (BCCTRL 36, crrc:$cc)>;
+def : InstAlias<"bngctrl $cc", (BCCTRL 36, crrc:$cc)>;
+def : InstAlias<"bnectrl $cc", (BCCTRL 68, crrc:$cc)>;
+def : InstAlias<"bnuctrl $cc", (BCCTRL 100, crrc:$cc)>;
+def : InstAlias<"bnsctrl $cc", (BCCTRL 100, crrc:$cc)>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 9b0df3e..f8cf3a5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -14,6 +14,7 @@
#include "PPC.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -51,7 +52,14 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
// before we return the symbol.
if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
Name += "$stub";
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ const char *PGP = AP.MAI->getPrivateGlobalPrefix();
+ const char *Prefix = "";
+ if (!Name.startswith(PGP)) {
+ // http://llvm.org/bugs/show_bug.cgi?id=15763
+ // all stubs and lazy_ptrs should be local symbols, which need leading 'L'
+ Prefix = PGP;
+ }
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name));
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI(AP).getFnStubEntry(Sym);
if (StubSym.getPointer())
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index ee18ead..40d1f3a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -84,6 +84,11 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
int CRSpillFrameIndex;
+ /// If any of CR[2-4] need to be saved in the prologue and restored in the
+ /// epilogue then they are added to this array. This is used for the
+ /// 64-bit SVR4 ABI.
+ SmallVector<unsigned, 3> MustSaveCRs;
+
public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
@@ -154,6 +159,10 @@ public:
int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
+
+ const SmallVector<unsigned, 3> &
+ getMustSaveCRs() const { return MustSaveCRs; }
+ void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
};
} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 1d61a3a..2be6324 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -76,6 +76,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
const TargetRegisterClass *
PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const {
+ // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
+ // when it checks for ZERO folding.
if (Kind == 1) {
if (Subtarget.isPPC64())
return &PPC::G8RC_NOX0RegClass;
@@ -452,6 +454,33 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
return false;
}
+// Figure out if the offset in the instruction is shifted right two bits. This
+// is true for instructions like "STD", which the machine implicitly adds two
+// low zeros to.
+static bool usesIXAddr(const MachineInstr &MI) {
+ unsigned OpC = MI.getOpcode();
+
+ switch (OpC) {
+ default:
+ return false;
+ case PPC::LWA:
+ case PPC::LD:
+ case PPC::STD:
+ return true;
+ }
+}
+
+// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
+static unsigned getOffsetONFromFION(const MachineInstr &MI,
+ unsigned FIOperandNum) {
+ // Take into account whether it's an add or mem instruction
+ unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
+ if (MI.isInlineAsm())
+ OffsetOperandNo = FIOperandNum-1;
+
+ return OffsetOperandNo;
+}
+
void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
@@ -469,10 +498,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
- // Take into account whether it's an add or mem instruction
- unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
- if (MI.isInlineAsm())
- OffsetOperandNo = FIOperandNum-1;
+ unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
// Get the frame index.
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -514,17 +540,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
(is64Bit ? PPC::X1 : PPC::R1),
false);
- // Figure out if the offset in the instruction is shifted right two bits. This
- // is true for instructions like "STD", which the machine implicitly adds two
- // low zeros to.
- bool isIXAddr = false;
- switch (OpC) {
- case PPC::LWA:
- case PPC::LD:
- case PPC::STD:
- isIXAddr = true;
- break;
- }
+ // Figure out if the offset in the instruction is shifted right two bits.
+ bool isIXAddr = usesIXAddr(MI);
// If the instruction is not present in ImmToIdxMap, then it has no immediate
// form (and must be r+r).
@@ -616,3 +633,124 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const {
unsigned PPCRegisterInfo::getEHHandlerRegister() const {
return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
}
+
+/// Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool PPCRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ assert(Offset < 0 && "Local offset must be negative");
+
+ unsigned FIOperandNum = 0;
+ while (!MI->getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+
+ if (!usesIXAddr(*MI))
+ Offset += MI->getOperand(OffsetOperandNo).getImm();
+ else
+ Offset += MI->getOperand(OffsetOperandNo).getImm() << 2;
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores that have
+ // an r+i form. Return false for everything else.
+ unsigned OpC = MI->getOpcode();
+ if (!ImmToIdxMap.count(OpC))
+ return false;
+
+ // Don't generate a new virtual base register just to add zero to it.
+ if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) &&
+ MI->getOperand(2).getImm() == 0)
+ return false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ unsigned StackEst =
+ PPCFI->determineFrameLayout(MF, false, true);
+
+ // If we likely don't need a stack frame, then we probably don't need a
+ // virtual base register either.
+ if (!StackEst)
+ return false;
+
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset += StackEst;
+
+ // The frame pointer will point to the end of the stack, so estimate the
+ // offset as the difference between the object offset and the FP location.
+ return !isFrameOffsetLegal(MI, Offset);
+}
+
+/// Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void PPCRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const MachineFunction &MF = *MBB->getParent();
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+
+ BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+}
+
+void
+PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+
+ unsigned FIOperandNum = 0;
+ while (!MI.getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
+ unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
+
+ bool isIXAddr = usesIXAddr(MI);
+ if (!isIXAddr)
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+ else
+ Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+ // Figure out if the offset in the instruction is shifted right two bits.
+ if (isIXAddr)
+ Offset >>= 2; // The actual encoded value has the low two bits zero.
+
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+}
+
+bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+ (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 7e6683e..7a48b4b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -61,6 +61,10 @@ public:
return true;
}
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return true;
+ }
+
void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
void lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
@@ -77,6 +81,15 @@ public:
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = NULL) const;
+ // Support for virtual base registers.
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index ae084aa..8d5838e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -759,7 +759,7 @@ def PPCA2Model : SchedMachineModel {
let LoadLatency = 6; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
- let MispredictPenalty = 6;
+ let MispredictPenalty = 13;
let Itineraries = PPCA2Itineraries;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index fe851c1..14dc794 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -86,8 +86,14 @@ public:
return getTM<PPCTargetMachine>();
}
+ const PPCSubtarget &getPPCSubtarget() const {
+ return *getPPCTargetMachine().getSubtargetImpl();
+ }
+
virtual bool addPreRegAlloc();
+ virtual bool addILPOpts();
virtual bool addInstSelector();
+ virtual bool addPreSched2();
virtual bool addPreEmitPass();
};
} // namespace
@@ -103,13 +109,31 @@ bool PPCPassConfig::addPreRegAlloc() {
return false;
}
+bool PPCPassConfig::addILPOpts() {
+ if (getPPCSubtarget().hasISEL()) {
+ addPass(&EarlyIfConverterID);
+ return true;
+ }
+
+ return false;
+}
+
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
addPass(createPPCISelDag(getPPCTargetMachine()));
return false;
}
+bool PPCPassConfig::addPreSched2() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID);
+
+ return true;
+}
+
bool PPCPassConfig::addPreEmitPass() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCEarlyReturnPass());
// Must run branch selection immediately preceding the asm printer.
addPass(createPPCBranchSelectionPass());
return false;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPU.h b/contrib/llvm/lib/Target/R600/AMDGPU.h
index 0b01433..9792bd8 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPU.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPU.h
@@ -24,6 +24,7 @@ class AMDGPUTargetMachine;
FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600Packetizer(TargetMachine &tm);
FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
// SI Passes
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
index f600144..4c35ecf 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -19,9 +19,16 @@
#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
+#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -50,15 +57,82 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (OutStreamer.hasRawTextSupport()) {
OutStreamer.EmitRawText("@" + MF.getName() + ":");
}
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+
+ const MCSectionELF *ConfigSection = getObjFileLowering().getContext()
+ .getELFSection(".AMDGPU.config",
+ ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(ConfigSection);
if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
- EmitProgramInfo(MF);
+ EmitProgramInfoSI(MF);
+ } else {
+ EmitProgramInfoR600(MF);
}
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
return false;
}
-void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
+ unsigned MaxGPR = 0;
+ bool killPixel = false;
+ const R600RegisterInfo * RI =
+ static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
+ R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ if (MI.getOpcode() == AMDGPU::KILLGT)
+ killPixel = true;
+ unsigned numOperands = MI.getNumOperands();
+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+ MachineOperand & MO = MI.getOperand(op_idx);
+ if (!MO.isReg())
+ continue;
+ unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
+
+ // Register with value > 127 aren't GPR
+ if (HWReg > 127)
+ continue;
+ MaxGPR = std::max(MaxGPR, HWReg);
+ }
+ }
+ }
+
+ unsigned RsrcReg;
+ if (STM.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX) {
+ // Evergreen / Northern Islands
+ switch (MFI->ShaderType) {
+ default: // Fall through
+ case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
+ case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
+ case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
+ case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
+ }
+ } else {
+ // R600 / R700
+ switch (MFI->ShaderType) {
+ default: // Fall through
+ case ShaderType::GEOMETRY: // Fall through
+ case ShaderType::COMPUTE: // Fall through
+ case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
+ case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
+ }
+ }
+
+ OutStreamer.EmitIntValue(RsrcReg, 4);
+ OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+ S_STACK_SIZE(MFI->StackSize), 4);
+ OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
+ OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
@@ -107,6 +181,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
isSGPR = false;
width = 2;
+ } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 3;
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
isSGPR = true;
width = 4;
@@ -139,7 +216,19 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
MaxSGPR += 2;
}
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
- OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
- OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
- OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+ unsigned RsrcReg;
+ switch (MFI->ShaderType) {
+ default: // Fall through
+ case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
+ case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
+ case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
+ case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
+ }
+
+ OutStreamer.EmitIntValue(RsrcReg, 4);
+ OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
+ if (MFI->ShaderType == ShaderType::PIXEL) {
+ OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+ OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+ }
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
index 3812282..f425ef4 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -33,7 +33,8 @@ public:
/// \brief Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.
- void EmitProgramInfo(MachineFunction &MF);
+ void EmitProgramInfoR600(MachineFunction &MF);
+ void EmitProgramInfoSI(MachineFunction &MF);
/// Implemented in AMDGPUMCInstLower.cpp
virtual void EmitInstruction(const MachineInstr *MI);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
index 45ae37e..9c30515 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td
@@ -32,8 +32,14 @@ def CC_SI : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
- ]>>>
+ ]>>>,
+ // This is the default for i64 values.
+ // XXX: We should change this once clang understands the CC_AMDGPU.
+ CCIfType<[i64], CCAssignToRegWithShadow<
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+ >>
]>;
def CC_AMDGPU : CallingConv<[
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
index f31b646..c2a79ea 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h
@@ -116,6 +116,7 @@ enum {
BRANCH_COND,
// End AMDIL ISD Opcodes
BITALIGN,
+ BUFFER_STORE,
DWORDADDR,
FRACT,
FMAX,
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
index e740348..d2620b2 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/R600/AMDGPUInstructions.td
@@ -94,6 +94,7 @@ class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
+int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
}
def CONST : Constants;
@@ -115,21 +116,21 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
- [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+ [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"FABS $dst, $src0",
- [(set rc:$dst, (fabs rc:$src0))]
+ [(set f32:$dst, (fabs f32:$src0))]
>;
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"FNEG $dst, $src0",
- [(set rc:$dst, (fneg rc:$src0))]
+ [(set f32:$dst, (fneg f32:$src0))]
>;
} // usesCustomInserter = 1
@@ -140,8 +141,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
(outs dstClass:$dst),
(ins addrClass:$addr, i32imm:$chan),
"RegisterLoad $dst, $addr",
- [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
- (i32 timm:$chan)))]
+ [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
> {
let isRegisterLoad = 1;
}
@@ -150,7 +150,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
(outs),
(ins dstClass:$val, addrClass:$addr, i32imm:$chan),
"RegisterStore $val, $addr",
- [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
+ [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
> {
let isRegisterStore = 1;
}
@@ -161,105 +161,140 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
/* Generic helper patterns for intrinsics */
/* -------------------------------------- */
-class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
- RegisterClass rc> : Pat <
- (fpow rc:$src0, rc:$src1),
- (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
+ : Pat <
+ (fpow f32:$src0, f32:$src1),
+ (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
>;
/* Other helper patterns */
/* --------------------- */
/* Extract element pattern */
-class Extract_Element <ValueType sub_type, ValueType vec_type,
- RegisterClass vec_class, int sub_idx,
- SubRegIndex sub_reg>: Pat<
- (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
- (EXTRACT_SUBREG vec_class:$src, sub_reg)
+class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
+ SubRegIndex sub_reg>
+ : Pat<
+ (sub_type (vector_extract vec_type:$src, sub_idx)),
+ (EXTRACT_SUBREG $src, sub_reg)
>;
/* Insert element pattern */
class Insert_Element <ValueType elem_type, ValueType vec_type,
- RegisterClass elem_class, RegisterClass vec_class,
- int sub_idx, SubRegIndex sub_reg> : Pat <
-
- (vec_type (vector_insert (vec_type vec_class:$vec),
- (elem_type elem_class:$elem), sub_idx)),
- (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+ int sub_idx, SubRegIndex sub_reg>
+ : Pat <
+ (vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
+ (INSERT_SUBREG $vec, $elem, sub_reg)
>;
// Vector Build pattern
-class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
- (vecType (build_vector (elemType elemClass:$src))),
- (vecType elemClass:$src)
+class Vector1_Build <ValueType vecType, ValueType elemType,
+ RegisterClass rc> : Pat <
+ (vecType (build_vector elemType:$src)),
+ (vecType (COPY_TO_REGCLASS $src, rc))
>;
-class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
- (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
+class Vector2_Build <ValueType vecType, ValueType elemType> : Pat <
+ (vecType (build_vector elemType:$sub0, elemType:$sub1)),
(INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
+ (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1)
>;
-class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
- (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
- (elemType elemClass:$z), (elemType elemClass:$w))),
+class Vector4_Build <ValueType vecType, ValueType elemType> : Pat <
+ (vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
- elemClass:$z, sub2), elemClass:$w, sub3)
+ (vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3)
>;
-class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
- (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
- (elemType elemClass:$sub2), (elemType elemClass:$sub3),
- (elemType elemClass:$sub4), (elemType elemClass:$sub5),
- (elemType elemClass:$sub6), (elemType elemClass:$sub7))),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+class Vector8_Build <ValueType vecType, ValueType elemType> : Pat <
+ (vecType (build_vector elemType:$sub0, elemType:$sub1,
+ elemType:$sub2, elemType:$sub3,
+ elemType:$sub4, elemType:$sub5,
+ elemType:$sub6, elemType:$sub7)),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
- elemClass:$sub2, sub2), elemClass:$sub3, sub3),
- elemClass:$sub4, sub4), elemClass:$sub5, sub5),
- elemClass:$sub6, sub6), elemClass:$sub7, sub7)
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
+ $sub2, sub2), $sub3, sub3),
+ $sub4, sub4), $sub5, sub5),
+ $sub6, sub6), $sub7, sub7)
>;
-class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
- ValueType elemType, RegisterClass elemClass> : Pat <
- (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
- (elemType elemClass:$sub2), (elemType elemClass:$sub3),
- (elemType elemClass:$sub4), (elemType elemClass:$sub5),
- (elemType elemClass:$sub6), (elemType elemClass:$sub7),
- (elemType elemClass:$sub8), (elemType elemClass:$sub9),
- (elemType elemClass:$sub10), (elemType elemClass:$sub11),
- (elemType elemClass:$sub12), (elemType elemClass:$sub13),
- (elemType elemClass:$sub14), (elemType elemClass:$sub15))),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+class Vector16_Build <ValueType vecType, ValueType elemType> : Pat <
+ (vecType (build_vector elemType:$sub0, elemType:$sub1,
+ elemType:$sub2, elemType:$sub3,
+ elemType:$sub4, elemType:$sub5,
+ elemType:$sub6, elemType:$sub7,
+ elemType:$sub8, elemType:$sub9,
+ elemType:$sub10, elemType:$sub11,
+ elemType:$sub12, elemType:$sub13,
+ elemType:$sub14, elemType:$sub15)),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
- elemClass:$sub2, sub2), elemClass:$sub3, sub3),
- elemClass:$sub4, sub4), elemClass:$sub5, sub5),
- elemClass:$sub6, sub6), elemClass:$sub7, sub7),
- elemClass:$sub8, sub8), elemClass:$sub9, sub9),
- elemClass:$sub10, sub10), elemClass:$sub11, sub11),
- elemClass:$sub12, sub12), elemClass:$sub13, sub13),
- elemClass:$sub14, sub14), elemClass:$sub15, sub15)
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+ (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
+ $sub2, sub2), $sub3, sub3),
+ $sub4, sub4), $sub5, sub5),
+ $sub6, sub6), $sub7, sub7),
+ $sub8, sub8), $sub9, sub9),
+ $sub10, sub10), $sub11, sub11),
+ $sub12, sub12), $sub13, sub13),
+ $sub14, sub14), $sub15, sub15)
>;
+// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
+// can handle COPY instructions.
// bitconvert pattern
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
(dt (bitconvert (st rc:$src0))),
(dt rc:$src0)
>;
+// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
+// can handle COPY instructions.
class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
(vt (AMDGPUdwordaddr (vt rc:$addr))),
(vt rc:$addr)
>;
+// BFI_INT patterns
+
+multiclass BFIPatterns <Instruction BFI_INT> {
+
+ // Definition from ISA doc:
+ // (y & x) | (z & ~x)
+ def : Pat <
+ (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
+ (BFI_INT $x, $y, $z)
+ >;
+
+ // SHA-256 Ch function
+ // z ^ (x & (y ^ z))
+ def : Pat <
+ (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
+ (BFI_INT $x, $y, $z)
+ >;
+
+}
+
+// SHA-256 Ma patterns
+
+// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
+class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
+ (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
+ (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
+>;
+
+// Bitfield extract patterns
+
+def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
+def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
+ SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
+
+class BFEPattern <Instruction BFE> : Pat <
+ (and (srl i32:$x, legalshift32:$y), bfemask:$z),
+ (BFE $x, $y, $z)
+>;
+
include "R600Instructions.td"
include "SIInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
index 0223ec8..0461025 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -1,4 +1,5 @@
#include "AMDGPUMachineFunction.h"
+#include "AMDGPU.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
@@ -8,6 +9,7 @@ const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo() {
+ ShaderType = ShaderType::COMPUTE;
AttributeSet Set = MF.getFunction()->getAttributes();
Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
ShaderTypeAttribute);
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
index 0f356a1..a7e1d7b 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -33,6 +33,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
DefaultSize[0] = 64;
DefaultSize[1] = 1;
DefaultSize[2] = 1;
+ HasVertexCache = false;
ParseSubtargetFeatures(GPU, FS);
DevName = GPU;
Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
@@ -53,6 +54,10 @@ AMDGPUSubtarget::is64bit() const {
return Is64bit;
}
bool
+AMDGPUSubtarget::hasVertexCache() const {
+ return HasVertexCache;
+}
+bool
AMDGPUSubtarget::isTargetELF() const {
return false;
}
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
index 1973fc6..b6501a4 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h
@@ -36,6 +36,7 @@ private:
bool Is32on64bit;
bool DumpCode;
bool R600ALUInst;
+ bool HasVertexCache;
InstrItineraryData InstrItins;
@@ -48,6 +49,7 @@ public:
bool isOverride(AMDGPUDeviceInfo::Caps) const;
bool is64bit() const;
+ bool hasVertexCache() const;
// Helper functions to simplify if statements
bool isTargetELF() const;
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
index e7ea876..31fbf32 100644
--- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -115,7 +115,6 @@ AMDGPUPassConfig::addPreISel() {
}
bool AMDGPUPassConfig::addInstSelector() {
- addPass(createAMDGPUPeepholeOpt(*TM));
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
@@ -153,8 +152,9 @@ bool AMDGPUPassConfig::addPreEmitPass() {
addPass(createAMDGPUCFGStructurizerPass(*TM));
addPass(createR600EmitClauseMarkers(*TM));
addPass(createR600ExpandSpecialInstrsPass(*TM));
- addPass(createR600ControlFlowFinalizer(*TM));
addPass(&FinalizeMachineBundlesID);
+ addPass(createR600Packetizer(*TM));
+ addPass(createR600ControlFlowFinalizer(*TM));
} else {
addPass(createSILowerControlFlowPass(*TM));
}
diff --git a/contrib/llvm/lib/Target/R600/AMDILBase.td b/contrib/llvm/lib/Target/R600/AMDILBase.td
index c12cedc..e221110 100644
--- a/contrib/llvm/lib/Target/R600/AMDILBase.td
+++ b/contrib/llvm/lib/Target/R600/AMDILBase.td
@@ -74,6 +74,10 @@ def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
"false",
"Older version of ALU instructions encoding.">;
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+ "HasVertexCache",
+ "true",
+ "Specify use of dedicated vertex cache.">;
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
diff --git a/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp
index 9605fbe..126514b 100644
--- a/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -44,7 +44,7 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
" on 32bit pointers!");
#endif
return new AMDGPUEvergreenDevice(ptr);
- } else if (deviceName == "redwood") {
+ } else if (deviceName == "redwood" || deviceName == "sumo") {
#if DEBUG
assert(!is64bit && "This device does not support 64bit pointers!");
assert(!is64on32bit && "This device does not support 64bit"
@@ -79,7 +79,10 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
" on 32bit pointers!");
#endif
return new AMDGPUNIDevice(ptr);
- } else if (deviceName == "SI") {
+ } else if (deviceName == "SI" ||
+ deviceName == "tahiti" || deviceName == "pitcairn" ||
+ deviceName == "verde" || deviceName == "oland" ||
+ deviceName == "hainan") {
return new AMDGPUSIDevice(ptr);
} else {
#if DEBUG
diff --git a/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp b/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp
index fa8f62d..ba75a44 100644
--- a/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -191,6 +191,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
RegSeqArgs, 2 * N->getNumOperands() + 1);
}
+ case ISD::BUILD_PAIR: {
+ SDValue RC, SubReg0, SubReg1;
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+ break;
+ }
+ if (N->getValueType(0) == MVT::i128) {
+ RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
+ SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
+ SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
+ } else if (N->getValueType(0) == MVT::i64) {
+ RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
+ SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
+ SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+ } else {
+ llvm_unreachable("Unhandled value type for BUILD_PAIR");
+ }
+ const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
+ N->getOperand(1), SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ N->getDebugLoc(), N->getValueType(0), Ops);
+ }
+
case ISD::ConstantFP:
case ISD::Constant: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
diff --git a/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp
deleted file mode 100644
index 3a28038..0000000
--- a/contrib/llvm/lib/Target/R600/AMDILPeepholeOptimizer.cpp
+++ /dev/null
@@ -1,1215 +0,0 @@
-//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//==-----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "PeepholeOpt"
-#ifdef DEBUG
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
-#define DEBUGME 0
-#endif
-
-#include "AMDILDevices.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-
-#include <sstream>
-
-#if 0
-STATISTIC(PointerAssignments, "Number of dynamic pointer "
- "assigments discovered");
-STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
-#endif
-
-using namespace llvm;
-// The Peephole optimization pass is used to do simple last minute optimizations
-// that are required for correct code or to remove redundant functions
-namespace {
-
-class OpaqueType;
-
-class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
-public:
- TargetMachine &TM;
- static char ID;
- AMDGPUPeepholeOpt(TargetMachine &tm);
- ~AMDGPUPeepholeOpt();
- const char *getPassName() const;
- bool runOnFunction(Function &F);
- bool doInitialization(Module &M);
- bool doFinalization(Module &M);
- void getAnalysisUsage(AnalysisUsage &AU) const;
-protected:
-private:
- // Function to initiate all of the instruction level optimizations.
- bool instLevelOptimizations(BasicBlock::iterator *inst);
- // Quick check to see if we need to dump all of the pointers into the
- // arena. If this is correct, then we set all pointers to exist in arena. This
- // is a workaround for aliasing of pointers in a struct/union.
- bool dumpAllIntoArena(Function &F);
- // Because I don't want to invalidate any pointers while in the
- // safeNestedForEachFunction. I push atomic conversions to a vector and handle
- // it later. This function does the conversions if required.
- void doAtomicConversionIfNeeded(Function &F);
- // Because __amdil_is_constant cannot be properly evaluated if
- // optimizations are disabled, the call's are placed in a vector
- // and evaluated after the __amdil_image* functions are evaluated
- // which should allow the __amdil_is_constant function to be
- // evaluated correctly.
- void doIsConstCallConversionIfNeeded();
- bool mChanged;
- bool mDebug;
- bool mConvertAtomics;
- CodeGenOpt::Level optLevel;
- // Run a series of tests to see if we can optimize a CALL instruction.
- bool optimizeCallInst(BasicBlock::iterator *bbb);
- // A peephole optimization to optimize bit extract sequences.
- bool optimizeBitExtract(Instruction *inst);
- // A peephole optimization to optimize bit insert sequences.
- bool optimizeBitInsert(Instruction *inst);
- bool setupBitInsert(Instruction *base,
- Instruction *&src,
- Constant *&mask,
- Constant *&shift);
- // Expand the bit field insert instruction on versions of OpenCL that
- // don't support it.
- bool expandBFI(CallInst *CI);
- // Expand the bit field mask instruction on version of OpenCL that
- // don't support it.
- bool expandBFM(CallInst *CI);
- // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
- // this case we need to expand them. These functions check for 24bit functions
- // and then expand.
- bool isSigned24BitOps(CallInst *CI);
- void expandSigned24BitOps(CallInst *CI);
- // One optimization that can occur is that if the required workgroup size is
- // specified then the result of get_local_size is known at compile time and
- // can be returned accordingly.
- bool isRWGLocalOpt(CallInst *CI);
- // On northern island cards, the division is slightly less accurate than on
- // previous generations, so we need to utilize a more accurate division. So we
- // can translate the accurate divide to a normal divide on all other cards.
- bool convertAccurateDivide(CallInst *CI);
- void expandAccurateDivide(CallInst *CI);
- // If the alignment is set incorrectly, it can produce really inefficient
- // code. This checks for this scenario and fixes it if possible.
- bool correctMisalignedMemOp(Instruction *inst);
-
- // If we are in no opt mode, then we need to make sure that
- // local samplers are properly propagated as constant propagation
- // doesn't occur and we need to know the value of kernel defined
- // samplers at compile time.
- bool propagateSamplerInst(CallInst *CI);
-
- // Helper functions
-
- // Group of functions that recursively calculate the size of a structure based
- // on it's sub-types.
- size_t getTypeSize(Type * const T, bool dereferencePtr = false);
- size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
- size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
- size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
- size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
- size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
- size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
- size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
-
- LLVMContext *mCTX;
- Function *mF;
- const AMDGPUSubtarget *mSTM;
- SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
- SmallVector<CallInst *, 16> isConstVec;
-}; // class AMDGPUPeepholeOpt
- char AMDGPUPeepholeOpt::ID = 0;
-
-// A template function that has two levels of looping before calling the
-// function with a pointer to the current iterator.
-template<class InputIterator, class SecondIterator, class Function>
-Function safeNestedForEach(InputIterator First, InputIterator Last,
- SecondIterator S, Function F) {
- for ( ; First != Last; ++First) {
- SecondIterator sf, sl;
- for (sf = First->begin(), sl = First->end();
- sf != sl; ) {
- if (!F(&sf)) {
- ++sf;
- }
- }
- }
- return F;
-}
-
-} // anonymous namespace
-
-namespace llvm {
- FunctionPass *
- createAMDGPUPeepholeOpt(TargetMachine &tm) {
- return new AMDGPUPeepholeOpt(tm);
- }
-} // llvm namespace
-
-AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
- : FunctionPass(ID), TM(tm) {
- mDebug = DEBUGME;
- optLevel = TM.getOptLevel();
-
-}
-
-AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt() {
-}
-
-const char *
-AMDGPUPeepholeOpt::getPassName() const {
- return "AMDGPU PeepHole Optimization Pass";
-}
-
-bool
-containsPointerType(Type *Ty) {
- if (!Ty) {
- return false;
- }
- switch(Ty->getTypeID()) {
- default:
- return false;
- case Type::StructTyID: {
- const StructType *ST = dyn_cast<StructType>(Ty);
- for (StructType::element_iterator stb = ST->element_begin(),
- ste = ST->element_end(); stb != ste; ++stb) {
- if (!containsPointerType(*stb)) {
- continue;
- }
- return true;
- }
- break;
- }
- case Type::VectorTyID:
- case Type::ArrayTyID:
- return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
- case Type::PointerTyID:
- return true;
- };
- return false;
-}
-
-bool
-AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F) {
- bool dumpAll = false;
- for (Function::const_arg_iterator cab = F.arg_begin(),
- cae = F.arg_end(); cab != cae; ++cab) {
- const Argument *arg = cab;
- const PointerType *PT = dyn_cast<PointerType>(arg->getType());
- if (!PT) {
- continue;
- }
- Type *DereferencedType = PT->getElementType();
- if (!dyn_cast<StructType>(DereferencedType)
- ) {
- continue;
- }
- if (!containsPointerType(DereferencedType)) {
- continue;
- }
- // FIXME: Because a pointer inside of a struct/union may be aliased to
- // another pointer we need to take the conservative approach and place all
- // pointers into the arena until more advanced detection is implemented.
- dumpAll = true;
- }
- return dumpAll;
-}
-void
-AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
- if (isConstVec.empty()) {
- return;
- }
- for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
- CallInst *CI = isConstVec[x];
- Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
- Type *aType = Type::getInt32Ty(*mCTX);
- Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
- : ConstantInt::get(aType, 0);
- CI->replaceAllUsesWith(Val);
- CI->eraseFromParent();
- }
- isConstVec.clear();
-}
-void
-AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F) {
- // Don't do anything if we don't have any atomic operations.
- if (atomicFuncs.empty()) {
- return;
- }
- // Change the function name for the atomic if it is required
- uint32_t size = atomicFuncs.size();
- for (uint32_t x = 0; x < size; ++x) {
- atomicFuncs[x].first->setOperand(
- atomicFuncs[x].first->getNumOperands()-1,
- atomicFuncs[x].second);
-
- }
- mChanged = true;
- if (mConvertAtomics) {
- return;
- }
-}
-
-bool
-AMDGPUPeepholeOpt::runOnFunction(Function &MF) {
- mChanged = false;
- mF = &MF;
- mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
- if (mDebug) {
- MF.dump();
- }
- mCTX = &MF.getType()->getContext();
- mConvertAtomics = true;
- safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
- std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
- this));
-
- doAtomicConversionIfNeeded(MF);
- doIsConstCallConversionIfNeeded();
-
- if (mDebug) {
- MF.dump();
- }
- return mChanged;
-}
-
-bool
-AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) {
- Instruction *inst = (*bbb);
- CallInst *CI = dyn_cast<CallInst>(inst);
- if (!CI) {
- return false;
- }
- if (isSigned24BitOps(CI)) {
- expandSigned24BitOps(CI);
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
- if (propagateSamplerInst(CI)) {
- return false;
- }
- if (expandBFI(CI) || expandBFM(CI)) {
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
- if (convertAccurateDivide(CI)) {
- expandAccurateDivide(CI);
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
-
- StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
- if (calleeName.startswith("__amdil_is_constant")) {
- // If we do not have optimizations, then this
- // cannot be properly evaluated, so we add the
- // call instruction to a vector and process
- // them at the end of processing after the
- // samplers have been correctly handled.
- if (optLevel == CodeGenOpt::None) {
- isConstVec.push_back(CI);
- return false;
- } else {
- Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
- Type *aType = Type::getInt32Ty(*mCTX);
- Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
- : ConstantInt::get(aType, 0);
- CI->replaceAllUsesWith(Val);
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
- }
-
- if (calleeName.equals("__amdil_is_asic_id_i32")) {
- ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
- Type *aType = Type::getInt32Ty(*mCTX);
- Value *Val = CV;
- if (Val) {
- Val = ConstantInt::get(aType,
- mSTM->device()->getDeviceFlag() & CV->getZExtValue());
- } else {
- Val = ConstantInt::get(aType, 0);
- }
- CI->replaceAllUsesWith(Val);
- ++(*bbb);
- CI->eraseFromParent();
- return true;
- }
- Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
- if (!F) {
- return false;
- }
- if (F->getName().startswith("__atom") && !CI->getNumUses()
- && F->getName().find("_xchg") == StringRef::npos) {
- std::string buffer(F->getName().str() + "_noret");
- F = dyn_cast<Function>(
- F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
- atomicFuncs.push_back(std::make_pair(CI, F));
- }
-
- if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
- && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
- return false;
- }
- if (!mConvertAtomics) {
- return false;
- }
- StringRef name = F->getName();
- if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
- mConvertAtomics = false;
- }
- return false;
-}
-
-bool
-AMDGPUPeepholeOpt::setupBitInsert(Instruction *base,
- Instruction *&src,
- Constant *&mask,
- Constant *&shift) {
- if (!base) {
- if (mDebug) {
- dbgs() << "Null pointer passed into function.\n";
- }
- return false;
- }
- bool andOp = false;
- if (base->getOpcode() == Instruction::Shl) {
- shift = dyn_cast<Constant>(base->getOperand(1));
- } else if (base->getOpcode() == Instruction::And) {
- mask = dyn_cast<Constant>(base->getOperand(1));
- andOp = true;
- } else {
- if (mDebug) {
- dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
- }
- // If the base is neither a Shl or a And, we don't fit any of the patterns above.
- return false;
- }
- src = dyn_cast<Instruction>(base->getOperand(0));
- if (!src) {
- if (mDebug) {
- dbgs() << "Failed setup since the base operand is not an instruction!\n";
- }
- return false;
- }
- // If we find an 'and' operation, then we don't need to
- // find the next operation as we already know the
- // bits that are valid at this point.
- if (andOp) {
- return true;
- }
- if (src->getOpcode() == Instruction::Shl && !shift) {
- shift = dyn_cast<Constant>(src->getOperand(1));
- src = dyn_cast<Instruction>(src->getOperand(0));
- } else if (src->getOpcode() == Instruction::And && !mask) {
- mask = dyn_cast<Constant>(src->getOperand(1));
- }
- if (!mask && !shift) {
- if (mDebug) {
- dbgs() << "Failed setup since both mask and shift are NULL!\n";
- }
- // Did not find a constant mask or a shift.
- return false;
- }
- return true;
-}
-bool
-AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst) {
- if (!inst) {
- return false;
- }
- if (!inst->isBinaryOp()) {
- return false;
- }
- if (inst->getOpcode() != Instruction::Or) {
- return false;
- }
- if (optLevel == CodeGenOpt::None) {
- return false;
- }
- // We want to do an optimization on a sequence of ops that in the end equals a
- // single ISA instruction.
- // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
- // Some simplified versions of this pattern are as follows:
- // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
- // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
- // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
- // (A & B) | (D << F) when (1 << F) >= B
- // (A << C) | (D & E) when (1 << C) >= E
- if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
- // The HD4XXX hardware doesn't support the ubit_insert instruction.
- return false;
- }
- Type *aType = inst->getType();
- bool isVector = aType->isVectorTy();
- int numEle = 1;
- // This optimization only works on 32bit integers.
- if (aType->getScalarType()
- != Type::getInt32Ty(inst->getContext())) {
- return false;
- }
- if (isVector) {
- const VectorType *VT = dyn_cast<VectorType>(aType);
- numEle = VT->getNumElements();
- // We currently cannot support more than 4 elements in a intrinsic and we
- // cannot support Vec3 types.
- if (numEle > 4 || numEle == 3) {
- return false;
- }
- }
- // TODO: Handle vectors.
- if (isVector) {
- if (mDebug) {
- dbgs() << "!!! Vectors are not supported yet!\n";
- }
- return false;
- }
- Instruction *LHSSrc = NULL, *RHSSrc = NULL;
- Constant *LHSMask = NULL, *RHSMask = NULL;
- Constant *LHSShift = NULL, *RHSShift = NULL;
- Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
- Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
- if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
- if (mDebug) {
- dbgs() << "Found an OR Operation that failed setup!\n";
- inst->dump();
- if (LHS) { LHS->dump(); }
- if (LHSSrc) { LHSSrc->dump(); }
- if (LHSMask) { LHSMask->dump(); }
- if (LHSShift) { LHSShift->dump(); }
- }
- // There was an issue with the setup for BitInsert.
- return false;
- }
- if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
- if (mDebug) {
- dbgs() << "Found an OR Operation that failed setup!\n";
- inst->dump();
- if (RHS) { RHS->dump(); }
- if (RHSSrc) { RHSSrc->dump(); }
- if (RHSMask) { RHSMask->dump(); }
- if (RHSShift) { RHSShift->dump(); }
- }
- // There was an issue with the setup for BitInsert.
- return false;
- }
- if (mDebug) {
- dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
- dbgs() << "Op: "; inst->dump();
- dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
- dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
- }
- Constant *offset = NULL;
- Constant *width = NULL;
- uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
- uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
- uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
- uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
- lhsMaskVal = (LHSMask
- ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
- rhsMaskVal = (RHSMask
- ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
- lhsShiftVal = (LHSShift
- ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
- rhsShiftVal = (RHSShift
- ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
- lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
- rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
- lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
- rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
- // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
- if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
- return false;
- }
- if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
- offset = ConstantInt::get(aType, lhsMaskOffset, false);
- width = ConstantInt::get(aType, lhsMaskWidth, false);
- RHSSrc = RHS;
- if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
- return false;
- }
- if (!LHSShift) {
- LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
- "MaskShr", LHS);
- } else if (lhsShiftVal != lhsMaskOffset) {
- LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
- "MaskShr", LHS);
- }
- if (mDebug) {
- dbgs() << "Optimizing LHS!\n";
- }
- } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
- offset = ConstantInt::get(aType, rhsMaskOffset, false);
- width = ConstantInt::get(aType, rhsMaskWidth, false);
- LHSSrc = RHSSrc;
- RHSSrc = LHS;
- if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
- return false;
- }
- if (!RHSShift) {
- LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
- "MaskShr", RHS);
- } else if (rhsShiftVal != rhsMaskOffset) {
- LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
- "MaskShr", RHS);
- }
- if (mDebug) {
- dbgs() << "Optimizing RHS!\n";
- }
- } else {
- if (mDebug) {
- dbgs() << "Failed constraint 3!\n";
- }
- return false;
- }
- if (mDebug) {
- dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
- dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
- dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
- dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
- }
- if (!offset || !width) {
- if (mDebug) {
- dbgs() << "Either width or offset are NULL, failed detection!\n";
- }
- return false;
- }
- // Lets create the function signature.
- std::vector<Type *> callTypes;
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- FunctionType *funcType = FunctionType::get(aType, callTypes, false);
- std::string name = "__amdil_ubit_insert";
- if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
- Function *Func =
- dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
- getOrInsertFunction(StringRef(name), funcType));
- Value *Operands[4] = {
- width,
- offset,
- LHSSrc,
- RHSSrc
- };
- CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
- if (mDebug) {
- dbgs() << "Old Inst: ";
- inst->dump();
- dbgs() << "New Inst: ";
- CI->dump();
- dbgs() << "\n\n";
- }
- CI->insertBefore(inst);
- inst->replaceAllUsesWith(CI);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst) {
- if (!inst) {
- return false;
- }
- if (!inst->isBinaryOp()) {
- return false;
- }
- if (inst->getOpcode() != Instruction::And) {
- return false;
- }
- if (optLevel == CodeGenOpt::None) {
- return false;
- }
- // We want to do some simple optimizations on Shift right/And patterns. The
- // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
- // value smaller than 32 and C is a mask. If C is a constant value, then the
- // following transformation can occur. For signed integers, it turns into the
- // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
- // integers, it turns into the function call dst =
- // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
- // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
- // Evergreen hardware.
- if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
- // This does not work on HD4XXX hardware.
- return false;
- }
- Type *aType = inst->getType();
- bool isVector = aType->isVectorTy();
-
- // XXX Support vector types
- if (isVector) {
- return false;
- }
- int numEle = 1;
- // This only works on 32bit integers
- if (aType->getScalarType()
- != Type::getInt32Ty(inst->getContext())) {
- return false;
- }
- if (isVector) {
- const VectorType *VT = dyn_cast<VectorType>(aType);
- numEle = VT->getNumElements();
- // We currently cannot support more than 4 elements in a intrinsic and we
- // cannot support Vec3 types.
- if (numEle > 4 || numEle == 3) {
- return false;
- }
- }
- BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
- // If the first operand is not a shift instruction, then we can return as it
- // doesn't match this pattern.
- if (!ShiftInst || !ShiftInst->isShift()) {
- return false;
- }
- // If we are a shift left, then we need don't match this pattern.
- if (ShiftInst->getOpcode() == Instruction::Shl) {
- return false;
- }
- bool isSigned = ShiftInst->isArithmeticShift();
- Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
- Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
- // Lets make sure that the shift value and the and mask are constant integers.
- if (!AndMask || !ShrVal) {
- return false;
- }
- Constant *newMaskConst;
- Constant *shiftValConst;
- if (isVector) {
- // Handle the vector case
- std::vector<Constant *> maskVals;
- std::vector<Constant *> shiftVals;
- ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
- ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
- Type *scalarType = AndMaskVec->getType()->getScalarType();
- assert(AndMaskVec->getNumOperands() ==
- ShrValVec->getNumOperands() && "cannot have a "
- "combination where the number of elements to a "
- "shift and an and are different!");
- for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
- ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
- ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
- if (!AndCI || !ShiftIC) {
- return false;
- }
- uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
- if (!isMask_32(maskVal)) {
- return false;
- }
- maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
- uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
- // If the mask or shiftval is greater than the bitcount, then break out.
- if (maskVal >= 32 || shiftVal >= 32) {
- return false;
- }
- // If the mask val is greater than the the number of original bits left
- // then this optimization is invalid.
- if (maskVal > (32 - shiftVal)) {
- return false;
- }
- maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
- shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
- }
- newMaskConst = ConstantVector::get(maskVals);
- shiftValConst = ConstantVector::get(shiftVals);
- } else {
- // Handle the scalar case
- uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
- // This must be a mask value where all lower bits are set to 1 and then any
- // bit higher is set to 0.
- if (!isMask_32(maskVal)) {
- return false;
- }
- maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
- // Count the number of bits set in the mask, this is the width of the
- // resulting bit set that is extracted from the source value.
- uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
- // If the mask or shift val is greater than the bitcount, then break out.
- if (maskVal >= 32 || shiftVal >= 32) {
- return false;
- }
- // If the mask val is greater than the the number of original bits left then
- // this optimization is invalid.
- if (maskVal > (32 - shiftVal)) {
- return false;
- }
- newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
- shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
- }
- // Lets create the function signature.
- std::vector<Type *> callTypes;
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- callTypes.push_back(aType);
- FunctionType *funcType = FunctionType::get(aType, callTypes, false);
- std::string name = "llvm.AMDGPU.bit.extract.u32";
- if (isVector) {
- name += ".v" + itostr(numEle) + "i32";
- } else {
- name += ".";
- }
- // Lets create the function.
- Function *Func =
- dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
- getOrInsertFunction(StringRef(name), funcType));
- Value *Operands[3] = {
- ShiftInst->getOperand(0),
- shiftValConst,
- newMaskConst
- };
- // Lets create the Call with the operands
- CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
- CI->setDoesNotAccessMemory();
- CI->insertBefore(inst);
- inst->replaceAllUsesWith(CI);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
- if (!CI) {
- return false;
- }
- Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
- if (!LHS->getName().startswith("__amdil_bfi")) {
- return false;
- }
- Type* type = CI->getOperand(0)->getType();
- Constant *negOneConst = NULL;
- if (type->isVectorTy()) {
- std::vector<Constant *> negOneVals;
- negOneConst = ConstantInt::get(CI->getContext(),
- APInt(32, StringRef("-1"), 10));
- for (size_t x = 0,
- y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
- negOneVals.push_back(negOneConst);
- }
- negOneConst = ConstantVector::get(negOneVals);
- } else {
- negOneConst = ConstantInt::get(CI->getContext(),
- APInt(32, StringRef("-1"), 10));
- }
- // __amdil_bfi => (A & B) | (~A & C)
- BinaryOperator *lhs =
- BinaryOperator::Create(Instruction::And, CI->getOperand(0),
- CI->getOperand(1), "bfi_and", CI);
- BinaryOperator *rhs =
- BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
- "bfi_not", CI);
- rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
- "bfi_and", CI);
- lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
- CI->replaceAllUsesWith(lhs);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
- if (!CI) {
- return false;
- }
- Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
- if (!LHS->getName().startswith("__amdil_bfm")) {
- return false;
- }
- // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
- Constant *newMaskConst = NULL;
- Constant *newShiftConst = NULL;
- Type* type = CI->getOperand(0)->getType();
- if (type->isVectorTy()) {
- std::vector<Constant*> newMaskVals, newShiftVals;
- newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
- newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
- for (size_t x = 0,
- y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
- newMaskVals.push_back(newMaskConst);
- newShiftVals.push_back(newShiftConst);
- }
- newMaskConst = ConstantVector::get(newMaskVals);
- newShiftConst = ConstantVector::get(newShiftVals);
- } else {
- newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
- newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
- }
- BinaryOperator *lhs =
- BinaryOperator::Create(Instruction::And, CI->getOperand(0),
- newMaskConst, "bfm_mask", CI);
- lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
- lhs, "bfm_shl", CI);
- lhs = BinaryOperator::Create(Instruction::Sub, lhs,
- newShiftConst, "bfm_sub", CI);
- BinaryOperator *rhs =
- BinaryOperator::Create(Instruction::And, CI->getOperand(1),
- newMaskConst, "bfm_mask", CI);
- lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
- CI->replaceAllUsesWith(lhs);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) {
- Instruction *inst = (*bbb);
- if (optimizeCallInst(bbb)) {
- return true;
- }
- if (optimizeBitExtract(inst)) {
- return false;
- }
- if (optimizeBitInsert(inst)) {
- return false;
- }
- if (correctMisalignedMemOp(inst)) {
- return false;
- }
- return false;
-}
-bool
-AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
- LoadInst *linst = dyn_cast<LoadInst>(inst);
- StoreInst *sinst = dyn_cast<StoreInst>(inst);
- unsigned alignment;
- Type* Ty = inst->getType();
- if (linst) {
- alignment = linst->getAlignment();
- Ty = inst->getType();
- } else if (sinst) {
- alignment = sinst->getAlignment();
- Ty = sinst->getValueOperand()->getType();
- } else {
- return false;
- }
- unsigned size = getTypeSize(Ty);
- if (size == alignment || size < alignment) {
- return false;
- }
- if (!Ty->isStructTy()) {
- return false;
- }
- if (alignment < 4) {
- if (linst) {
- linst->setAlignment(0);
- return true;
- } else if (sinst) {
- sinst->setAlignment(0);
- return true;
- }
- }
- return false;
-}
-bool
-AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI) {
- if (!CI) {
- return false;
- }
- Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
- std::string namePrefix = LHS->getName().substr(0, 14);
- if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
- && namePrefix != "__amdil__imul24_high") {
- return false;
- }
- if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
- return false;
- }
- return true;
-}
-
-void
-AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI) {
- assert(isSigned24BitOps(CI) && "Must be a "
- "signed 24 bit operation to call this function!");
- Value *LHS = CI->getOperand(CI->getNumOperands()-1);
- // On 7XX and 8XX we do not have signed 24bit, so we need to
- // expand it to the following:
- // imul24 turns into 32bit imul
- // imad24 turns into 32bit imad
- // imul24_high turns into 32bit imulhigh
- if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
- Type *aType = CI->getOperand(0)->getType();
- bool isVector = aType->isVectorTy();
- int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
- std::vector<Type*> callTypes;
- callTypes.push_back(CI->getOperand(0)->getType());
- callTypes.push_back(CI->getOperand(1)->getType());
- callTypes.push_back(CI->getOperand(2)->getType());
- FunctionType *funcType =
- FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
- std::string name = "__amdil_imad";
- if (isVector) {
- name += "_v" + itostr(numEle) + "i32";
- } else {
- name += "_i32";
- }
- Function *Func = dyn_cast<Function>(
- CI->getParent()->getParent()->getParent()->
- getOrInsertFunction(StringRef(name), funcType));
- Value *Operands[3] = {
- CI->getOperand(0),
- CI->getOperand(1),
- CI->getOperand(2)
- };
- CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
- nCI->insertBefore(CI);
- CI->replaceAllUsesWith(nCI);
- } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
- BinaryOperator *mulOp =
- BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
- CI->getOperand(1), "imul24", CI);
- CI->replaceAllUsesWith(mulOp);
- } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
- Type *aType = CI->getOperand(0)->getType();
-
- bool isVector = aType->isVectorTy();
- int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
- std::vector<Type*> callTypes;
- callTypes.push_back(CI->getOperand(0)->getType());
- callTypes.push_back(CI->getOperand(1)->getType());
- FunctionType *funcType =
- FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
- std::string name = "__amdil_imul_high";
- if (isVector) {
- name += "_v" + itostr(numEle) + "i32";
- } else {
- name += "_i32";
- }
- Function *Func = dyn_cast<Function>(
- CI->getParent()->getParent()->getParent()->
- getOrInsertFunction(StringRef(name), funcType));
- Value *Operands[2] = {
- CI->getOperand(0),
- CI->getOperand(1)
- };
- CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
- nCI->insertBefore(CI);
- CI->replaceAllUsesWith(nCI);
- }
-}
-
-bool
-AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI) {
- return (CI != NULL
- && CI->getOperand(CI->getNumOperands() - 1)->getName()
- == "__amdil_get_local_size_int");
-}
-
-bool
-AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI) {
- if (!CI) {
- return false;
- }
- if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
- && (mSTM->getDeviceName() == "cayman")) {
- return false;
- }
- return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20)
- == "__amdil_improved_div";
-}
-
-void
-AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI) {
- assert(convertAccurateDivide(CI)
- && "expanding accurate divide can only happen if it is expandable!");
- BinaryOperator *divOp =
- BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
- CI->getOperand(1), "fdiv32", CI);
- CI->replaceAllUsesWith(divOp);
-}
-
-bool
-AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
- if (optLevel != CodeGenOpt::None) {
- return false;
- }
-
- if (!CI) {
- return false;
- }
-
- unsigned funcNameIdx = 0;
- funcNameIdx = CI->getNumOperands() - 1;
- StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
- if (calleeName != "__amdil_image2d_read_norm"
- && calleeName != "__amdil_image2d_read_unnorm"
- && calleeName != "__amdil_image3d_read_norm"
- && calleeName != "__amdil_image3d_read_unnorm") {
- return false;
- }
-
- unsigned samplerIdx = 2;
- samplerIdx = 1;
- Value *sampler = CI->getOperand(samplerIdx);
- LoadInst *lInst = dyn_cast<LoadInst>(sampler);
- if (!lInst) {
- return false;
- }
-
- if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
- return false;
- }
-
- GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
- // If we are loading from what is not a global value, then we
- // fail and return.
- if (!gv) {
- return false;
- }
-
- // If we don't have an initializer or we have an initializer and
- // the initializer is not a 32bit integer, we fail.
- if (!gv->hasInitializer()
- || !gv->getInitializer()->getType()->isIntegerTy(32)) {
- return false;
- }
-
- // Now that we have the global variable initializer, lets replace
- // all uses of the load instruction with the samplerVal and
- // reparse the __amdil_is_constant() function.
- Constant *samplerVal = gv->getInitializer();
- lInst->replaceAllUsesWith(samplerVal);
- return true;
-}
-
-bool
-AMDGPUPeepholeOpt::doInitialization(Module &M) {
- return false;
-}
-
-bool
-AMDGPUPeepholeOpt::doFinalization(Module &M) {
- return false;
-}
-
-void
-AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineFunctionAnalysis>();
- FunctionPass::getAnalysisUsage(AU);
- AU.setPreservesAll();
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
- size_t size = 0;
- if (!T) {
- return size;
- }
- switch (T->getTypeID()) {
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- case Type::LabelTyID:
- assert(0 && "These types are not supported by this backend");
- default:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- size = T->getPrimitiveSizeInBits() >> 3;
- break;
- case Type::PointerTyID:
- size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
- break;
- case Type::IntegerTyID:
- size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
- break;
- case Type::StructTyID:
- size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
- break;
- case Type::ArrayTyID:
- size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
- break;
- case Type::FunctionTyID:
- size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
- break;
- case Type::VectorTyID:
- size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
- break;
- };
- return size;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
- bool dereferencePtr) {
- size_t size = 0;
- if (!ST) {
- return size;
- }
- Type *curType;
- StructType::element_iterator eib;
- StructType::element_iterator eie;
- for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
- curType = *eib;
- size += getTypeSize(curType, dereferencePtr);
- }
- return size;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
- bool dereferencePtr) {
- return IT ? (IT->getBitWidth() >> 3) : 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
- bool dereferencePtr) {
- assert(0 && "Should not be able to calculate the size of an function type");
- return 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
- bool dereferencePtr) {
- return (size_t)(AT ? (getTypeSize(AT->getElementType(),
- dereferencePtr) * AT->getNumElements())
- : 0);
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
- bool dereferencePtr) {
- return VT ? (VT->getBitWidth() >> 3) : 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
- bool dereferencePtr) {
- if (!PT) {
- return 0;
- }
- Type *CT = PT->getElementType();
- if (CT->getTypeID() == Type::StructTyID &&
- PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
- return getTypeSize(dyn_cast<StructType>(CT));
- } else if (dereferencePtr) {
- size_t size = 0;
- for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
- size += getTypeSize(PT->getContainedType(x), dereferencePtr);
- }
- return size;
- } else {
- return 4;
- }
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
- bool dereferencePtr) {
- //assert(0 && "Should not be able to calculate the size of an opaque type");
- return 4;
-}
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 10547a5..303cdf2 100644
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -17,6 +17,7 @@ using namespace llvm;
void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot) {
+ OS.flush();
printInstruction(MI, OS);
printAnnotation(OS, Annot);
@@ -67,11 +68,14 @@ void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
}
void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, StringRef Asm) {
+ raw_ostream &O, StringRef Asm,
+ StringRef Default) {
const MCOperand &Op = MI->getOperand(OpNo);
assert(Op.isImm());
if (Op.getImm() == 1) {
O << Asm;
+ } else {
+ O << Default;
}
}
@@ -98,7 +102,7 @@ void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- printIfSet(MI, OpNo, O, " *");
+ printIfSet(MI, OpNo, O.indent(20 - O.GetNumBytesInBuffer()), "*", " ");
}
void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
@@ -169,4 +173,41 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
O << "." << chans[chan];
}
+void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int BankSwizzle = MI->getOperand(OpNo).getImm();
+ switch (BankSwizzle) {
+ case 1:
+ O << "BS:VEC_021";
+ break;
+ case 2:
+ O << "BS:VEC_120";
+ break;
+ case 3:
+ O << "BS:VEC_102";
+ break;
+ case 4:
+ O << "BS:VEC_201";
+ break;
+ case 5:
+ O << "BS:VEC_210";
+ break;
+ default:
+ break;
+ }
+ return;
+}
+
+void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int KCacheMode = MI->getOperand(OpNo).getImm();
+ if (KCacheMode > 0) {
+ int KCacheBank = MI->getOperand(OpNo - 2).getImm();
+ O << "CB" << KCacheBank <<":";
+ int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
+ int LineSize = (KCacheMode == 1)?16:32;
+ O << KCacheAddr * 16 << "-" << KCacheAddr * 16 + LineSize;
+ }
+}
+
#include "AMDGPUGenAsmWriter.inc"
diff --git a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 767a708..c6fd053 100644
--- a/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -35,7 +35,8 @@ private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
+ void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+ StringRef Asm, StringRef Default = "");
void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -47,6 +48,8 @@ private:
void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 98fca43..a3397f3 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -44,7 +44,6 @@ public:
AMDGPUAsmBackend(const Target &T)
: MCAsmBackend() {}
- virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
virtual unsigned getNumFixupKinds() const { return 0; };
virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const;
@@ -71,16 +70,6 @@ void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
}
}
-MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
- StringRef CPU) {
- return new AMDGPUAsmBackend(T);
-}
-
-AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
- raw_ostream &OS) const {
- return new AMDGPUMCObjectWriter(OS);
-}
-
void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value) const {
@@ -88,3 +77,21 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
assert(Fixup.getKind() == FK_PCRel_4);
*Dst = (Value - 4) / 4;
}
+
+//===----------------------------------------------------------------------===//
+// ELFAMDGPUAsmBackend class
+//===----------------------------------------------------------------------===//
+
+class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
+public:
+ ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createAMDGPUELFObjectWriter(OS);
+ }
+};
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU) {
+ return new ELFAMDGPUAsmBackend(T);
+}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
new file mode 100644
index 0000000..48fac9f
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -0,0 +1,39 @@
+//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ AMDGPUELFObjectWriter();
+protected:
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ llvm_unreachable("Not implemented");
+ }
+
+};
+
+
+} // End anonymous namespace
+
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
+ : MCELFObjectTargetWriter(false, 0, 0, false) { }
+
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+ MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
+ return createELFObjectWriter(MOTW, OS, true);
+}
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index b7cdd7c..2aae26a 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
//===--- Dwarf Emission Directives -----------------------------------===//
HasLEB128 = true;
SupportsDebugInformation = true;
- DwarfSectionOffsetDirective = ".offset";
-
}
const char*
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 072ee49..61d70bb 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -78,7 +78,7 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
} else {
- return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+ return createR600MCCodeEmitter(MCII, MRI, STI);
}
}
@@ -88,7 +88,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCCodeEmitter *_Emitter,
bool RelaxAll,
bool NoExecStack) {
- return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false);
}
extern "C" void LLVMInitializeR600TargetMC() {
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index 363a4af..abb0320 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -23,16 +23,17 @@ class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
+class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
+class raw_ostream;
extern Target TheAMDGPUTarget;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
+ const MCSubtargetInfo &STI);
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -41,6 +42,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
StringRef CPU);
+
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
} // End llvm namespace
#define GET_REGINFO_ENUM
diff --git a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 927bcbd..cb4cf0c 100644
--- a/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -9,12 +9,8 @@
//
/// \file
///
-/// This code emitter outputs bytecode that is understood by the r600g driver
-/// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
-/// but it still needs to be run through a finalizer in order to be executed
-/// by the GPU.
-///
-/// [1] http://www.mesa3d.org/
+/// \brief The R600 code emitter produces machine code that can be executed
+/// directly on the GPU device.
//
//===----------------------------------------------------------------------===//
@@ -30,9 +26,6 @@
#include "llvm/Support/raw_ostream.h"
#include <stdio.h>
-#define SRC_BYTE_COUNT 11
-#define DST_BYTE_COUNT 5
-
using namespace llvm;
namespace {
@@ -43,13 +36,12 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
const MCSubtargetInfo &STI;
- MCContext &Ctx;
public:
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
- const MCSubtargetInfo &sti, MCContext &ctx)
- : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+ const MCSubtargetInfo &sti)
+ : MCII(mcii), MRI(mri), STI(sti) { }
/// \brief Encode the instruction and write it to the OS.
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -60,30 +52,14 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
private:
- void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const;
- void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
- void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
- raw_ostream &OS) const;
- void EmitDst(const MCInst &MI, raw_ostream &OS) const;
- void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
-
- void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
-
void EmitByte(unsigned int byte, raw_ostream &OS) const;
- void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
-
void Emit(uint32_t value, raw_ostream &OS) const;
void Emit(uint64_t value, raw_ostream &OS) const;
unsigned getHWRegChan(unsigned reg) const;
unsigned getHWReg(unsigned regNo) const;
- bool isFCOp(unsigned opcode) const;
- bool isTexOp(unsigned opcode) const;
- bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
-
};
} // End anonymous namespace
@@ -95,16 +71,6 @@ enum RegElement {
ELEMENT_W
};
-enum InstrTypes {
- INSTR_ALU = 0,
- INSTR_TEX,
- INSTR_FC,
- INSTR_NATIVE,
- INSTR_VTX,
- INSTR_EXPORT,
- INSTR_CFALU
-};
-
enum FCInstr {
FC_IF_PREDICATE = 0,
FC_ELSE,
@@ -132,355 +98,95 @@ enum TextureTypes {
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+ const MCSubtargetInfo &STI) {
+ return new R600MCCodeEmitter(MCII, MRI, STI);
}
void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
- if (isFCOp(MI.getOpcode())){
- EmitFCInstr(MI, OS);
- } else if (MI.getOpcode() == AMDGPU::RETURN ||
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ if (MI.getOpcode() == AMDGPU::RETURN ||
+ MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
+ MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
MI.getOpcode() == AMDGPU::BUNDLE ||
MI.getOpcode() == AMDGPU::KILL) {
return;
- } else {
- switch(MI.getOpcode()) {
- case AMDGPU::STACK_SIZE: {
- EmitByte(MI.getOperand(0).getImm(), OS);
- break;
- }
- case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
- uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
- EmitByte(INSTR_NATIVE, OS);
- Emit(inst, OS);
- break;
- }
- case AMDGPU::CONSTANT_LOAD_eg:
- case AMDGPU::VTX_READ_PARAM_8_eg:
- case AMDGPU::VTX_READ_PARAM_16_eg:
- case AMDGPU::VTX_READ_PARAM_32_eg:
- case AMDGPU::VTX_READ_PARAM_128_eg:
- case AMDGPU::VTX_READ_GLOBAL_8_eg:
- case AMDGPU::VTX_READ_GLOBAL_32_eg:
- case AMDGPU::VTX_READ_GLOBAL_128_eg:
- case AMDGPU::TEX_VTX_CONSTBUF:
- case AMDGPU::TEX_VTX_TEXBUF : {
- uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
- uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
-
- EmitByte(INSTR_VTX, OS);
- Emit(InstWord01, OS);
- Emit(InstWord2, OS);
- break;
- }
- case AMDGPU::TEX_LD:
- case AMDGPU::TEX_GET_TEXTURE_RESINFO:
- case AMDGPU::TEX_SAMPLE:
- case AMDGPU::TEX_SAMPLE_C:
- case AMDGPU::TEX_SAMPLE_L:
- case AMDGPU::TEX_SAMPLE_C_L:
- case AMDGPU::TEX_SAMPLE_LB:
- case AMDGPU::TEX_SAMPLE_C_LB:
- case AMDGPU::TEX_SAMPLE_G:
- case AMDGPU::TEX_SAMPLE_C_G:
- case AMDGPU::TEX_GET_GRADIENTS_H:
- case AMDGPU::TEX_GET_GRADIENTS_V:
- case AMDGPU::TEX_SET_GRADIENTS_H:
- case AMDGPU::TEX_SET_GRADIENTS_V: {
- unsigned Opcode = MI.getOpcode();
- bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
- unsigned OpOffset = HasOffsets ? 3 : 0;
- int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
- int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
-
- uint32_t SrcSelect[4] = {0, 1, 2, 3};
- uint32_t Offsets[3] = {0, 0, 0};
- uint64_t CoordType[4] = {1, 1, 1, 1};
-
- if (HasOffsets)
- for (unsigned i = 0; i < 3; i++) {
- int SignedOffset = MI.getOperand(i + 2).getImm();
- Offsets[i] = (SignedOffset & 0x1F);
- }
-
-
- if (TextureType == TEXTURE_RECT ||
- TextureType == TEXTURE_SHADOWRECT) {
- CoordType[ELEMENT_X] = 0;
- CoordType[ELEMENT_Y] = 0;
- }
-
- if (TextureType == TEXTURE_1D_ARRAY ||
- TextureType == TEXTURE_SHADOW1D_ARRAY) {
- if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
- Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
- CoordType[ELEMENT_Y] = 0;
- } else {
- CoordType[ELEMENT_Z] = 0;
- SrcSelect[ELEMENT_Z] = ELEMENT_Y;
- }
- } else if (TextureType == TEXTURE_2D_ARRAY ||
- TextureType == TEXTURE_SHADOW2D_ARRAY) {
- CoordType[ELEMENT_Z] = 0;
+ } else if (IS_VTX(Desc)) {
+ uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+ uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+ InstWord2 |= 1 << 19;
+
+ Emit(InstWord01, OS);
+ Emit(InstWord2, OS);
+ Emit((u_int32_t) 0, OS);
+ } else if (IS_TEX(Desc)) {
+ unsigned Opcode = MI.getOpcode();
+ bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+ unsigned OpOffset = HasOffsets ? 3 : 0;
+ int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+ int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+ uint32_t SrcSelect[4] = {0, 1, 2, 3};
+ uint32_t Offsets[3] = {0, 0, 0};
+ uint64_t CoordType[4] = {1, 1, 1, 1};
+
+ if (HasOffsets)
+ for (unsigned i = 0; i < 3; i++) {
+ int SignedOffset = MI.getOperand(i + 2).getImm();
+ Offsets[i] = (SignedOffset & 0x1F);
}
-
- if ((TextureType == TEXTURE_SHADOW1D ||
- TextureType == TEXTURE_SHADOW2D ||
- TextureType == TEXTURE_SHADOWRECT ||
- TextureType == TEXTURE_SHADOW1D_ARRAY) &&
- Opcode != AMDGPU::TEX_SAMPLE_C_L &&
- Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
- SrcSelect[ELEMENT_W] = ELEMENT_Z;
- }
-
- uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
- CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
- CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
- uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
- SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
- SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
- Offsets[2] << 10;
-
- EmitByte(INSTR_TEX, OS);
- Emit(Word01, OS);
- Emit(Word2, OS);
- break;
- }
- case AMDGPU::EG_ExportSwz:
- case AMDGPU::R600_ExportSwz:
- case AMDGPU::EG_ExportBuf:
- case AMDGPU::R600_ExportBuf: {
- uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
- EmitByte(INSTR_EXPORT, OS);
- Emit(Inst, OS);
- break;
- }
- case AMDGPU::CF_ALU:
- case AMDGPU::CF_ALU_PUSH_BEFORE: {
- uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
- EmitByte(INSTR_CFALU, OS);
- Emit(Inst, OS);
- break;
- }
- case AMDGPU::CF_TC:
- case AMDGPU::CF_VC:
- case AMDGPU::CF_CALL_FS:
- return;
- case AMDGPU::WHILE_LOOP:
- case AMDGPU::END_LOOP:
- case AMDGPU::LOOP_BREAK:
- case AMDGPU::CF_CONTINUE:
- case AMDGPU::CF_JUMP:
- case AMDGPU::CF_ELSE:
- case AMDGPU::POP: {
- uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
- EmitByte(INSTR_NATIVE, OS);
- Emit(Inst, OS);
- break;
+ if (TextureType == TEXTURE_RECT ||
+ TextureType == TEXTURE_SHADOWRECT) {
+ CoordType[ELEMENT_X] = 0;
+ CoordType[ELEMENT_Y] = 0;
}
- default:
- EmitALUInstr(MI, Fixups, OS);
- break;
- }
- }
-}
-
-void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- raw_ostream &OS) const {
- const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-
- // Emit instruction type
- EmitByte(INSTR_ALU, OS);
-
- uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
-
- //older alu have different encoding for instructions with one or two src
- //parameters.
- if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
- !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
- uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
- InstWord01 &= ~(0x3FFULL << 39);
- InstWord01 |= ISAOpCode << 1;
- }
-
- unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
- MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
-
- EmitByte(SrcNum, OS);
-
- const unsigned SrcOps[3][2] = {
- {R600Operands::SRC0, R600Operands::SRC0_SEL},
- {R600Operands::SRC1, R600Operands::SRC1_SEL},
- {R600Operands::SRC2, R600Operands::SRC2_SEL}
- };
- for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
- unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
- unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
- EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
- }
-
- Emit(InstWord01, OS);
- return;
-}
-
-void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
- raw_ostream &OS) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- union {
- float f;
- uint32_t i;
- } Value;
- Value.i = 0;
- // Emit the source select (2 bytes). For GPRs, this is the register index.
- // For other potential instruction operands, (e.g. constant registers) the
- // value of the source select is defined in the r600isa docs.
- if (MO.isReg()) {
- unsigned reg = MO.getReg();
- EmitTwoBytes(getHWReg(reg), OS);
- if (reg == AMDGPU::ALU_LITERAL_X) {
- unsigned ImmOpIndex = MI.getNumOperands() - 1;
- MCOperand ImmOp = MI.getOperand(ImmOpIndex);
- if (ImmOp.isFPImm()) {
- Value.f = ImmOp.getFPImm();
+ if (TextureType == TEXTURE_1D_ARRAY ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) {
+ if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+ Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+ CoordType[ELEMENT_Y] = 0;
} else {
- assert(ImmOp.isImm());
- Value.i = ImmOp.getImm();
+ CoordType[ELEMENT_Z] = 0;
+ SrcSelect[ELEMENT_Z] = ELEMENT_Y;
}
+ } else if (TextureType == TEXTURE_2D_ARRAY ||
+ TextureType == TEXTURE_SHADOW2D_ARRAY) {
+ CoordType[ELEMENT_Z] = 0;
}
- } else {
- // XXX: Handle other operand types.
- EmitTwoBytes(0, OS);
- }
-
- // Emit the source channel (1 byte)
- if (MO.isReg()) {
- EmitByte(getHWRegChan(MO.getReg()), OS);
- } else {
- EmitByte(0, OS);
- }
-
- // XXX: Emit isNegated (1 byte)
- if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
- && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
- (MO.isReg() &&
- (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
- EmitByte(1, OS);
- } else {
- EmitByte(0, OS);
- }
-
- // Emit isAbsolute (1 byte)
- if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
- EmitByte(1, OS);
- } else {
- EmitByte(0, OS);
- }
-
- // XXX: Emit relative addressing mode (1 byte)
- EmitByte(0, OS);
-
- // Emit kc_bank, This will be adjusted later by r600_asm
- EmitByte(0, OS);
- // Emit the literal value, if applicable (4 bytes).
- Emit(Value.i, OS);
-}
-
-void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
- unsigned SelOpIdx, raw_ostream &OS) const {
- const MCOperand &RegMO = MI.getOperand(RegOpIdx);
- const MCOperand &SelMO = MI.getOperand(SelOpIdx);
-
- union {
- float f;
- uint32_t i;
- } InlineConstant;
- InlineConstant.i = 0;
- // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
- // and select is 0 (GPR index is encoded in the instr encoding. For constants
- // type is 1 and select is the original const select passed from the driver.
- unsigned Reg = RegMO.getReg();
- if (Reg == AMDGPU::ALU_CONST) {
- EmitByte(1, OS);
- uint32_t Sel = SelMO.getImm();
- Emit(Sel, OS);
- } else {
- EmitByte(0, OS);
- Emit((uint32_t)0, OS);
- }
-
- if (Reg == AMDGPU::ALU_LITERAL_X) {
- unsigned ImmOpIndex = MI.getNumOperands() - 1;
- MCOperand ImmOp = MI.getOperand(ImmOpIndex);
- if (ImmOp.isFPImm()) {
- InlineConstant.f = ImmOp.getFPImm();
- } else {
- assert(ImmOp.isImm());
- InlineConstant.i = ImmOp.getImm();
+ if ((TextureType == TEXTURE_SHADOW1D ||
+ TextureType == TEXTURE_SHADOW2D ||
+ TextureType == TEXTURE_SHADOWRECT ||
+ TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+ Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+ SrcSelect[ELEMENT_W] = ELEMENT_Z;
}
- }
-
- // Emit the literal value, if applicable (4 bytes).
- Emit(InlineConstant.i, OS);
-}
-
-void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
-
- // Emit instruction type
- EmitByte(INSTR_FC, OS);
- // Emit SRC
- unsigned NumOperands = MI.getNumOperands();
- if (NumOperands > 0) {
- assert(NumOperands == 1);
- EmitSrc(MI, 0, OS);
+ uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+ CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+ CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+ uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+ SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+ SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+ Offsets[2] << 10;
+
+ Emit(Word01, OS);
+ Emit(Word2, OS);
+ Emit((u_int32_t) 0, OS);
} else {
- EmitNullBytes(SRC_BYTE_COUNT, OS);
- }
-
- // Emit FC Instruction
- enum FCInstr instr;
- switch (MI.getOpcode()) {
- case AMDGPU::PREDICATED_BREAK:
- instr = FC_BREAK_PREDICATE;
- break;
- case AMDGPU::CONTINUE:
- instr = FC_CONTINUE;
- break;
- case AMDGPU::IF_PREDICATE_SET:
- instr = FC_IF_PREDICATE;
- break;
- case AMDGPU::ELSE:
- instr = FC_ELSE;
- break;
- case AMDGPU::ENDIF:
- instr = FC_ENDIF;
- break;
- case AMDGPU::ENDLOOP:
- instr = FC_ENDLOOP;
- break;
- case AMDGPU::WHILELOOP:
- instr = FC_BGNLOOP;
- break;
- default:
- abort();
- break;
- }
- EmitByte(instr, OS);
-}
-
-void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
- raw_ostream &OS) const {
-
- for (unsigned int i = 0; i < ByteCount; i++) {
- EmitByte(0, OS);
+ uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+ if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+ ((Desc.TSFlags & R600_InstFlag::OP1) ||
+ Desc.TSFlags & R600_InstFlag::OP2)) {
+ uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
+ Inst &= ~(0x3FFULL << 39);
+ Inst |= ISAOpCode << 1;
+ }
+ Emit(Inst, OS);
}
}
@@ -488,12 +194,6 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
OS.write((uint8_t) Byte & 0xff);
}
-void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
- raw_ostream &OS) const {
- OS.write((uint8_t) (Bytes & 0xff));
- OS.write((uint8_t) ((Bytes >> 8) & 0xff));
-}
-
void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
for (unsigned i = 0; i < 4; i++) {
OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
@@ -531,55 +231,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
}
}
-//===----------------------------------------------------------------------===//
-// Encoding helper functions
-//===----------------------------------------------------------------------===//
-
-bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
- switch(opcode) {
- default: return false;
- case AMDGPU::PREDICATED_BREAK:
- case AMDGPU::CONTINUE:
- case AMDGPU::IF_PREDICATE_SET:
- case AMDGPU::ELSE:
- case AMDGPU::ENDIF:
- case AMDGPU::ENDLOOP:
- case AMDGPU::WHILELOOP:
- return true;
- }
-}
-
-bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
- switch(opcode) {
- default: return false;
- case AMDGPU::TEX_LD:
- case AMDGPU::TEX_GET_TEXTURE_RESINFO:
- case AMDGPU::TEX_SAMPLE:
- case AMDGPU::TEX_SAMPLE_C:
- case AMDGPU::TEX_SAMPLE_L:
- case AMDGPU::TEX_SAMPLE_C_L:
- case AMDGPU::TEX_SAMPLE_LB:
- case AMDGPU::TEX_SAMPLE_C_LB:
- case AMDGPU::TEX_SAMPLE_G:
- case AMDGPU::TEX_SAMPLE_C_G:
- case AMDGPU::TEX_GET_GRADIENTS_H:
- case AMDGPU::TEX_GET_GRADIENTS_V:
- case AMDGPU::TEX_SET_GRADIENTS_H:
- case AMDGPU::TEX_SET_GRADIENTS_V:
- return true;
- }
-}
-
-bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
- unsigned Flag) const {
- const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
- unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
- if (FlagIndex == 0) {
- return false;
- }
- assert(MI.getOperand(FlagIndex).isImm());
- return !!((MI.getOperand(FlagIndex).getImm() >>
- (NUM_MO_FLAGS * Operand)) & Flag);
-}
-
#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/R600/Processors.td b/contrib/llvm/lib/Target/R600/Processors.td
index 868810c..0cbe919 100644
--- a/contrib/llvm/lib/Target/R600/Processors.td
+++ b/contrib/llvm/lib/Target/R600/Processors.td
@@ -1,4 +1,4 @@
-//===-- Processors.td - TODO: Add brief description -------===//
+//===-- Processors.td - R600 Processor definitions ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,25 +6,43 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// AMDIL processors supported.
-//
-//===----------------------------------------------------------------------===//
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
: Processor<Name, itin, Features>;
-def : Proc<"", R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"rv710", R600_EG_Itin, []>;
-def : Proc<"rv730", R600_EG_Itin, []>;
-def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
-def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
-
+def : Proc<"", R600_VLIW5_Itin,
+ [FeatureR600ALUInst, FeatureVertexCache]>;
+def : Proc<"r600", R600_VLIW5_Itin,
+ [FeatureR600ALUInst , FeatureVertexCache]>;
+def : Proc<"rs880", R600_VLIW5_Itin,
+ [FeatureR600ALUInst]>;
+def : Proc<"rv670", R600_VLIW5_Itin,
+ [FeatureR600ALUInst, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"rv710", R600_VLIW5_Itin,
+ [FeatureVertexCache]>;
+def : Proc<"rv730", R600_VLIW5_Itin,
+ [FeatureVertexCache]>;
+def : Proc<"rv770", R600_VLIW5_Itin,
+ [FeatureFP64, FeatureVertexCache]>;
+def : Proc<"cedar", R600_VLIW5_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"redwood", R600_VLIW5_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"sumo", R600_VLIW5_Itin,
+ [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper", R600_VLIW5_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"cypress", R600_VLIW5_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"barts", R600_VLIW5_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"turks", R600_VLIW5_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"caicos", R600_VLIW5_Itin,
+ [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman", R600_VLIW4_Itin,
+ [FeatureByteAddress, FeatureImages, FeatureFP64]>;def : Proc<"SI", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"tahiti", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"pitcairn", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"verde", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"oland", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"hainan", SI_Itin, [Feature64BitPtr, FeatureFP64]>;
diff --git a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 3a6c7ea..ffe3414 100644
--- a/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -30,35 +30,27 @@ namespace llvm {
class R600ControlFlowFinalizer : public MachineFunctionPass {
private:
+ typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
+
+ enum ControlFlowInstruction {
+ CF_TC,
+ CF_VC,
+ CF_CALL_FS,
+ CF_WHILE_LOOP,
+ CF_END_LOOP,
+ CF_LOOP_BREAK,
+ CF_LOOP_CONTINUE,
+ CF_JUMP,
+ CF_ELSE,
+ CF_POP,
+ CF_END
+ };
+
static char ID;
const R600InstrInfo *TII;
+ const R600RegisterInfo &TRI;
unsigned MaxFetchInst;
-
- bool isFetch(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- case AMDGPU::TEX_VTX_CONSTBUF:
- case AMDGPU::TEX_VTX_TEXBUF:
- case AMDGPU::TEX_LD:
- case AMDGPU::TEX_GET_TEXTURE_RESINFO:
- case AMDGPU::TEX_GET_GRADIENTS_H:
- case AMDGPU::TEX_GET_GRADIENTS_V:
- case AMDGPU::TEX_SET_GRADIENTS_H:
- case AMDGPU::TEX_SET_GRADIENTS_V:
- case AMDGPU::TEX_SAMPLE:
- case AMDGPU::TEX_SAMPLE_C:
- case AMDGPU::TEX_SAMPLE_L:
- case AMDGPU::TEX_SAMPLE_C_L:
- case AMDGPU::TEX_SAMPLE_LB:
- case AMDGPU::TEX_SAMPLE_C_LB:
- case AMDGPU::TEX_SAMPLE_G:
- case AMDGPU::TEX_SAMPLE_C_G:
- case AMDGPU::TXD:
- case AMDGPU::TXD_SHADOW:
- return true;
- default:
- return false;
- }
- }
+ const AMDGPUSubtarget &ST;
bool IsTrivialInst(MachineInstr *MI) const {
switch (MI->getOpcode()) {
@@ -70,26 +62,226 @@ private:
}
}
- MachineBasicBlock::iterator
- MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned CfAddress) const {
+ const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
+ unsigned Opcode = 0;
+ bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
+ switch (CFI) {
+ case CF_TC:
+ Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
+ break;
+ case CF_VC:
+ Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
+ break;
+ case CF_CALL_FS:
+ Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
+ break;
+ case CF_WHILE_LOOP:
+ Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
+ break;
+ case CF_END_LOOP:
+ Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
+ break;
+ case CF_LOOP_BREAK:
+ Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
+ break;
+ case CF_LOOP_CONTINUE:
+ Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
+ break;
+ case CF_JUMP:
+ Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
+ break;
+ case CF_ELSE:
+ Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
+ break;
+ case CF_POP:
+ Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
+ break;
+ case CF_END:
+ if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) {
+ Opcode = AMDGPU::CF_END_CM;
+ break;
+ }
+ Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
+ break;
+ }
+ assert (Opcode && "No opcode selected");
+ return TII->get(Opcode);
+ }
+
+ bool isCompatibleWithClause(const MachineInstr *MI,
+ std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const {
+ unsigned DstMI, SrcMI;
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef())
+ DstMI = MO.getReg();
+ if (MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+ SrcMI = Reg;
+ else
+ SrcMI = TRI.getMatchingSuperReg(Reg,
+ TRI.getSubRegFromChannel(TRI.getHWRegChan(Reg)),
+ &AMDGPU::R600_Reg128RegClass);
+ }
+ }
+ if ((DstRegs.find(SrcMI) == DstRegs.end()) &&
+ (SrcRegs.find(DstMI) == SrcRegs.end())) {
+ SrcRegs.insert(SrcMI);
+ DstRegs.insert(DstMI);
+ return true;
+ } else
+ return false;
+ }
+
+ ClauseFile
+ MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+ const {
MachineBasicBlock::iterator ClauseHead = I;
+ std::vector<MachineInstr *> ClauseContent;
unsigned AluInstCount = 0;
+ bool IsTex = TII->usesTextureCache(ClauseHead);
+ std::set<unsigned> DstRegs, SrcRegs;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
if (IsTrivialInst(I))
continue;
- if (!isFetch(I))
+ if (AluInstCount > MaxFetchInst)
+ break;
+ if ((IsTex && !TII->usesTextureCache(I)) ||
+ (!IsTex && !TII->usesVertexCache(I)))
+ break;
+ if (!isCompatibleWithClause(I, DstRegs, SrcRegs))
break;
AluInstCount ++;
- if (AluInstCount > MaxFetchInst)
+ ClauseContent.push_back(I);
+ }
+ MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+ getHWInstrDesc(IsTex?CF_TC:CF_VC))
+ .addImm(0) // ADDR
+ .addImm(AluInstCount - 1); // COUNT
+ return ClauseFile(MIb, ClauseContent);
+ }
+
+ void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
+ unsigned LiteralRegs[] = {
+ AMDGPU::ALU_LITERAL_X,
+ AMDGPU::ALU_LITERAL_Y,
+ AMDGPU::ALU_LITERAL_Z,
+ AMDGPU::ALU_LITERAL_W
+ };
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.getReg() != AMDGPU::ALU_LITERAL_X)
+ continue;
+ unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM);
+ int64_t Imm = MI->getOperand(ImmIdx).getImm();
+ std::vector<int64_t>::iterator It =
+ std::find(Lits.begin(), Lits.end(), Imm);
+ if (It != Lits.end()) {
+ unsigned Index = It - Lits.begin();
+ MO.setReg(LiteralRegs[Index]);
+ } else {
+ assert(Lits.size() < 4 && "Too many literals in Instruction Group");
+ MO.setReg(LiteralRegs[Lits.size()]);
+ Lits.push_back(Imm);
+ }
+ }
+ }
+
+ MachineBasicBlock::iterator insertLiterals(
+ MachineBasicBlock::iterator InsertPos,
+ const std::vector<unsigned> &Literals) const {
+ MachineBasicBlock *MBB = InsertPos->getParent();
+ for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+ unsigned LiteralPair0 = Literals[i];
+ unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
+ InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
+ TII->get(AMDGPU::LITERALS))
+ .addImm(LiteralPair0)
+ .addImm(LiteralPair1);
+ }
+ return InsertPos;
+ }
+
+ ClauseFile
+ MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+ const {
+ MachineBasicBlock::iterator ClauseHead = I;
+ std::vector<MachineInstr *> ClauseContent;
+ I++;
+ for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
+ if (IsTrivialInst(I)) {
+ ++I;
+ continue;
+ }
+ if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
break;
+ std::vector<int64_t> Literals;
+ if (I->isBundle()) {
+ MachineInstr *DeleteMI = I;
+ MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+ while (++BI != E && BI->isBundledWithPred()) {
+ BI->unbundleFromPred();
+ for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = BI->getOperand(i);
+ if (MO.isReg() && MO.isInternalRead())
+ MO.setIsInternalRead(false);
+ }
+ getLiteral(BI, Literals);
+ ClauseContent.push_back(BI);
+ }
+ I = BI;
+ DeleteMI->eraseFromParent();
+ } else {
+ getLiteral(I, Literals);
+ ClauseContent.push_back(I);
+ I++;
+ }
+ for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+ unsigned literal0 = Literals[i];
+ unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
+ MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
+ TII->get(AMDGPU::LITERALS))
+ .addImm(literal0)
+ .addImm(literal2);
+ ClauseContent.push_back(MILit);
+ }
}
- BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
- TII->get(AMDGPU::CF_TC))
- .addImm(CfAddress) // ADDR
- .addImm(AluInstCount); // COUNT
- return I;
+ ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
+ return ClauseFile(ClauseHead, ClauseContent);
}
+
+ void
+ EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+ unsigned &CfCount) {
+ CounterPropagateAddr(Clause.first, CfCount);
+ MachineBasicBlock *BB = Clause.first->getParent();
+ BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
+ .addImm(CfCount);
+ for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+ BB->splice(InsertPos, BB, Clause.second[i]);
+ }
+ CfCount += 2 * Clause.second.size();
+ }
+
+ void
+ EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+ unsigned &CfCount) {
+ CounterPropagateAddr(Clause.first, CfCount);
+ MachineBasicBlock *BB = Clause.first->getParent();
+ BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
+ .addImm(CfCount);
+ for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+ BB->splice(InsertPos, BB, Clause.second[i]);
+ }
+ CfCount += Clause.second.size();
+ }
+
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
}
@@ -102,9 +294,27 @@ private:
}
}
+ unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
+ switch (ST.device()->getGeneration()) {
+ case AMDGPUDeviceInfo::HD4XXX:
+ if (hasPush)
+ StackSubEntry += 2;
+ break;
+ case AMDGPUDeviceInfo::HD5XXX:
+ if (hasPush)
+ StackSubEntry ++;
+ case AMDGPUDeviceInfo::HD6XXX:
+ StackSubEntry += 2;
+ break;
+ }
+ return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
+ }
+
public:
R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
- TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+ TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
+ TRI(TII->getRegisterInfo()),
+ ST(tm.getSubtarget<AMDGPUSubtarget>()) {
const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
MaxFetchInst = 8;
@@ -115,6 +325,7 @@ public:
virtual bool runOnMachineFunction(MachineFunction &MF) {
unsigned MaxStack = 0;
unsigned CurrentStack = 0;
+ bool HasPush = false;
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
++MB) {
MachineBasicBlock &MBB = *MB;
@@ -124,14 +335,16 @@ public:
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
if (MFI->ShaderType == 1) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
- TII->get(AMDGPU::CF_CALL_FS));
+ getHWInstrDesc(CF_CALL_FS));
CfCount++;
+ MaxStack = 1;
}
+ std::vector<ClauseFile> FetchClauses, AluClauses;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
- if (isFetch(I)) {
+ if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
DEBUG(dbgs() << CfCount << ":"; I->dump(););
- I = MakeFetchClause(MBB, I, 0);
+ FetchClauses.push_back(MakeFetchClause(MBB, I));
CfCount++;
continue;
}
@@ -142,20 +355,25 @@ public:
case AMDGPU::CF_ALU_PUSH_BEFORE:
CurrentStack++;
MaxStack = std::max(MaxStack, CurrentStack);
+ HasPush = true;
case AMDGPU::CF_ALU:
+ I = MI;
+ AluClauses.push_back(MakeALUClause(MBB, I));
case AMDGPU::EG_ExportBuf:
case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportBuf:
case AMDGPU::R600_ExportSwz:
+ case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+ case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
case AMDGPU::WHILELOOP: {
- CurrentStack++;
+ CurrentStack+=4;
MaxStack = std::max(MaxStack, CurrentStack);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::WHILE_LOOP))
- .addImm(2);
+ getHWInstrDesc(CF_WHILE_LOOP))
+ .addImm(1);
std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
std::set<MachineInstr *>());
Pair.second.insert(MIb);
@@ -165,12 +383,12 @@ public:
break;
}
case AMDGPU::ENDLOOP: {
- CurrentStack--;
+ CurrentStack-=4;
std::pair<unsigned, std::set<MachineInstr *> > Pair =
LoopStack.back();
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
.addImm(Pair.first + 1);
MI->eraseFromParent();
CfCount++;
@@ -178,7 +396,7 @@ public:
}
case AMDGPU::IF_PREDICATE_SET: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::CF_JUMP))
+ getHWInstrDesc(CF_JUMP))
.addImm(0)
.addImm(0);
IfThenElseStack.push_back(MIb);
@@ -192,7 +410,7 @@ public:
IfThenElseStack.pop_back();
CounterPropagateAddr(JumpInst, CfCount);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::CF_ELSE))
+ getHWInstrDesc(CF_ELSE))
.addImm(0)
.addImm(1);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@@ -207,9 +425,10 @@ public:
IfThenElseStack.pop_back();
CounterPropagateAddr(IfOrElseInst, CfCount + 1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::POP))
+ getHWInstrDesc(CF_POP))
.addImm(CfCount + 1)
.addImm(1);
+ (void)MIb;
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
MI->eraseFromParent();
CfCount++;
@@ -218,13 +437,13 @@ public:
case AMDGPU::PREDICATED_BREAK: {
CurrentStack--;
CfCount += 3;
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
.addImm(CfCount)
.addImm(1);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::LOOP_BREAK))
+ getHWInstrDesc(CF_LOOP_BREAK))
.addImm(0);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
.addImm(CfCount)
.addImm(1);
LoopStack.back().second.insert(MIb);
@@ -233,20 +452,31 @@ public:
}
case AMDGPU::CONTINUE: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- TII->get(AMDGPU::CF_CONTINUE))
+ getHWInstrDesc(CF_LOOP_CONTINUE))
.addImm(0);
LoopStack.back().second.insert(MIb);
MI->eraseFromParent();
CfCount++;
break;
}
+ case AMDGPU::RETURN: {
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
+ CfCount++;
+ MI->eraseFromParent();
+ if (CfCount % 2) {
+ BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
+ CfCount++;
+ }
+ for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
+ EmitFetchClause(I, FetchClauses[i], CfCount);
+ for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
+ EmitALUClause(I, AluClauses[i], CfCount);
+ }
default:
break;
}
}
- BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
- TII->get(AMDGPU::STACK_SIZE))
- .addImm(MaxStack);
+ MFI->StackSize = getHWStackSize(MaxStack, HasPush);
}
return false;
@@ -265,4 +495,3 @@ char R600ControlFlowFinalizer::ID = 0;
llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
return new R600ControlFlowFinalizer(TM);
}
-
diff --git a/contrib/llvm/lib/Target/R600/R600Defines.h b/contrib/llvm/lib/Target/R600/R600Defines.h
index 16cfcf5..36bfb18 100644
--- a/contrib/llvm/lib/Target/R600/R600Defines.h
+++ b/contrib/llvm/lib/Target/R600/R600Defines.h
@@ -39,7 +39,9 @@ namespace R600_InstFlag {
//FlagOperand bits 7, 8
NATIVE_OPERANDS = (1 << 9),
OP1 = (1 << 10),
- OP2 = (1 << 11)
+ OP2 = (1 << 11),
+ VTX_INST = (1 << 12),
+ TEX_INST = (1 << 13)
};
}
@@ -52,6 +54,9 @@ namespace R600_InstFlag {
#define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
#define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
+#define IS_VTX(desc) ((desc).TSFlags & R600_InstFlag::VTX_INST)
+#define IS_TEX(desc) ((desc).TSFlags & R600_InstFlag::TEX_INST)
+
namespace R600Operands {
enum Ops {
DST,
@@ -78,6 +83,7 @@ namespace R600Operands {
LAST,
PRED_SEL,
IMM,
+ BANK_SWIZZLE,
COUNT
};
@@ -85,13 +91,39 @@ namespace R600Operands {
// W C S S S S S S S S S S S
// R O D L S R R R R S R R R R S R R R L P
// D U I M R A R C C C C R C C C C R C C C A R I
-// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
-// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
- {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
- {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
- {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+// S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M B
+// T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M S
+ {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12,13},
+ {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19,20},
+ {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17,18}
};
}
+//===----------------------------------------------------------------------===//
+// Config register definitions
+//===----------------------------------------------------------------------===//
+
+#define R_02880C_DB_SHADER_CONTROL 0x02880C
+#define S_02880C_KILL_ENABLE(x) (((x) & 0x1) << 6)
+
+// These fields are the same for all shader types and families.
+#define S_NUM_GPRS(x) (((x) & 0xFF) << 0)
+#define S_STACK_SIZE(x) (((x) & 0xFF) << 8)
+//===----------------------------------------------------------------------===//
+// R600, R700 Registers
+//===----------------------------------------------------------------------===//
+
+#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
+#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
+
+//===----------------------------------------------------------------------===//
+// Evergreen, Northern Islands Registers
+//===----------------------------------------------------------------------===//
+
+#define R_028844_SQ_PGM_RESOURCES_PS 0x028844
+#define R_028860_SQ_PGM_RESOURCES_VS 0x028860
+#define R_028878_SQ_PGM_RESOURCES_GS 0x028878
+#define R_0288D4_SQ_PGM_RESOURCES_LS 0x0288d4
+
#endif // R600DEFINES_H_
diff --git a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
index 53e6e51..7252235 100644
--- a/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/R600ISelLowering.cpp
@@ -43,11 +43,25 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::AND, MVT::v4i32, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+ setOperationAction(ISD::MUL, MVT::v2i32, Expand);
+ setOperationAction(ISD::MUL, MVT::v4i32, Expand);
+ setOperationAction(ISD::OR, MVT::v4i32, Expand);
+ setOperationAction(ISD::OR, MVT::v2i32, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+ setOperationAction(ISD::SHL, MVT::v4i32, Expand);
+ setOperationAction(ISD::SHL, MVT::v2i32, Expand);
+ setOperationAction(ISD::SRL, MVT::v4i32, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i32, Expand);
+ setOperationAction(ISD::SRA, MVT::v4i32, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i32, Expand);
+ setOperationAction(ISD::SUB, MVT::v4i32, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
setOperationAction(ISD::UREM, MVT::v4i32, Expand);
setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v4i32, Expand);
+ setOperationAction(ISD::XOR, MVT::v2i32, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
@@ -70,6 +84,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
+
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@@ -93,6 +110,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::SELECT_CC);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
setSchedulingPreference(Sched::VLIW);
}
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
index b232188..37150c4 100644
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "R600InstrInfo.h"
+#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "R600Defines.h"
@@ -29,7 +30,8 @@ using namespace llvm;
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
- RI(tm, *this)
+ RI(tm, *this),
+ ST(tm.getSubtarget<AMDGPUSubtarget>())
{ }
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
@@ -139,6 +141,33 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
(TargetFlags & R600_InstFlag::OP3));
}
+bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
+ return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
+}
+
+bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
+ return isTransOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
+ return ST.hasVertexCache() && IS_VTX(get(Opcode));
+}
+
+bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
+ const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
+ return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
+ return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
+}
+
+bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
+ const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
+ return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
+ usesTextureCache(MI->getOpcode());
+}
+
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
@@ -183,10 +212,19 @@ R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
if (SrcIdx < 0)
break;
- if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+ unsigned Reg = MI->getOperand(SrcIdx).getReg();
+ if (Reg == AMDGPU::ALU_CONST) {
unsigned Const = MI->getOperand(
getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
Consts.push_back(Const);
+ continue;
+ }
+ if (AMDGPU::R600_KC0RegClass.contains(Reg) ||
+ AMDGPU::R600_KC1RegClass.contains(Reg)) {
+ unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+ unsigned Chan = RI.getHWRegChan(Reg);
+ Consts.push_back((Index << 2) | Chan);
+ continue;
}
}
}
@@ -684,7 +722,8 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
//scheduling to the backend, we can change the default to 0.
MIB.addImm(1) // $last
.addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
- .addImm(0); // $literal
+ .addImm(0) // $literal
+ .addImm(0); // $bank_swizzle
return MIB;
}
diff --git a/contrib/llvm/lib/Target/R600/R600InstrInfo.h b/contrib/llvm/lib/Target/R600/R600InstrInfo.h
index dbae900..babe4b8 100644
--- a/contrib/llvm/lib/Target/R600/R600InstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/R600InstrInfo.h
@@ -33,6 +33,7 @@ namespace llvm {
class R600InstrInfo : public AMDGPUInstrInfo {
private:
const R600RegisterInfo RI;
+ const AMDGPUSubtarget &ST;
int getBranchInstr(const MachineOperand &op) const;
@@ -53,6 +54,14 @@ namespace llvm {
/// \returns true if this \p Opcode represents an ALU instruction.
bool isALUInstr(unsigned Opcode) const;
+ bool isTransOnly(unsigned Opcode) const;
+ bool isTransOnly(const MachineInstr *MI) const;
+
+ bool usesVertexCache(unsigned Opcode) const;
+ bool usesVertexCache(const MachineInstr *MI) const;
+ bool usesTextureCache(unsigned Opcode) const;
+ bool usesTextureCache(const MachineInstr *MI) const;
+
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
bool canBundle(const std::vector<MachineInstr *> &) const;
diff --git a/contrib/llvm/lib/Target/R600/R600Instructions.td b/contrib/llvm/lib/Target/R600/R600Instructions.td
index 663b41a..8f47523 100644
--- a/contrib/llvm/lib/Target/R600/R600Instructions.td
+++ b/contrib/llvm/lib/Target/R600/R600Instructions.td
@@ -13,11 +13,12 @@
include "R600Intrinsics.td"
-class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
+class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
InstrItinClass itin>
: AMDGPUInst <outs, ins, asm, pattern> {
field bits<64> Inst;
+ bit TransOnly = 0;
bit Trig = 0;
bit Op3 = 0;
bit isVector = 0;
@@ -25,9 +26,9 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
bit Op1 = 0;
bit Op2 = 0;
bit HasNativeOperands = 0;
+ bit VTXInst = 0;
+ bit TEXInst = 0;
- bits<11> op_code = inst;
- //let Inst = inst;
let Namespace = "AMDGPU";
let OutOperandList = outs;
let InOperandList = ins;
@@ -35,6 +36,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
let Pattern = pattern;
let Itinerary = itin;
+ let TSFlags{0} = TransOnly;
let TSFlags{4} = Trig;
let TSFlags{5} = Op3;
@@ -45,11 +47,12 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
let TSFlags{9} = HasNativeOperands;
let TSFlags{10} = Op1;
let TSFlags{11} = Op2;
+ let TSFlags{12} = VTXInst;
+ let TSFlags{13} = TEXInst;
}
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
- AMDGPUInst <outs, ins, asm, pattern> {
- field bits<64> Inst;
+ InstR600 <outs, ins, asm, pattern, NullALU> {
let Namespace = "AMDGPU";
}
@@ -74,6 +77,9 @@ class InstFlag<string PM = "printOperand", int Default = 0>
def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
let PrintMethod = "printSel";
}
+def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
+ let PrintMethod = "printBankSwizzle";
+}
def LITERAL : InstFlag<"printLiteral">;
@@ -137,7 +143,7 @@ class R600ALU_Word1 {
field bits<32> Word1;
bits<11> dst;
- bits<3> bank_swizzle = 0;
+ bits<3> bank_swizzle;
bits<1> dst_rel;
bits<1> clamp;
@@ -346,15 +352,15 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
// and R600InstrInfo::getOperandIdx().
class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
- InstR600 <0,
- (outs R600_Reg32:$dst),
+ InstR600 <(outs R600_Reg32:$dst),
(ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
- LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+ BANK_SWIZZLE:$bank_swizzle),
!strconcat(" ", opName,
- "$clamp $dst$write$dst_rel$omod, "
+ "$last$clamp $dst$write$dst_rel$omod, "
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
- "$literal $pred_sel$last"),
+ "$pred_sel $bank_swizzle"),
pattern,
itin>,
R600ALU_Word0,
@@ -385,18 +391,18 @@ class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
- InstR600 <inst,
- (outs R600_Reg32:$dst),
+ InstR600 <(outs R600_Reg32:$dst),
(ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
- LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+ BANK_SWIZZLE:$bank_swizzle),
!strconcat(" ", opName,
- "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
+ "$last$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
"$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
"$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
- "$literal $pred_sel$last"),
+ "$pred_sel $bank_swizzle"),
pattern,
itin>,
R600ALU_Word0,
@@ -423,18 +429,19 @@ class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
// R600InstrInfo::getOperandIdx().
class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
- InstR600 <0,
- (outs R600_Reg32:$dst),
+ InstR600 <(outs R600_Reg32:$dst),
(ins REL:$dst_rel, CLAMP:$clamp,
R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
- LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
- !strconcat(" ", opName, "$clamp $dst$dst_rel, "
+ LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+ BANK_SWIZZLE:$bank_swizzle),
+ !strconcat(" ", opName, "$last$clamp $dst$dst_rel, "
"$src0_neg$src0$src0_rel, "
"$src1_neg$src1$src1_rel, "
"$src2_neg$src2$src2_rel, "
- "$literal $pred_sel$last"),
+ "$pred_sel"
+ "$bank_swizzle"),
pattern,
itin>,
R600ALU_Word0,
@@ -450,8 +457,7 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
InstrItinClass itin = VecALU> :
- InstR600 <inst,
- (outs R600_Reg32:$dst),
+ InstR600 <(outs R600_Reg32:$dst),
ins,
asm,
pattern,
@@ -459,8 +465,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
InstrItinClass itin = AnyALU> :
- InstR600 <inst,
- (outs R600_Reg128:$DST_GPR),
+ InstR600 <(outs R600_Reg128:$DST_GPR),
(ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
!strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
pattern,
@@ -481,11 +486,14 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
let FETCH_WHOLE_QUAD = 0;
let ALT_CONST = 0;
let SAMPLER_INDEX_MODE = 0;
+ let RESOURCE_INDEX_MODE = 0;
let COORD_TYPE_X = 0;
let COORD_TYPE_Y = 0;
let COORD_TYPE_Z = 0;
let COORD_TYPE_W = 0;
+
+ let TEXInst = 1;
}
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
@@ -738,7 +746,9 @@ multiclass SteamOutputExportPattern<Instruction ExportInst,
4095, imm:$mask, buf3inst, 0)>;
}
-let usesCustomInserter = 1 in {
+// Export Instructions should not be duplicated by TailDuplication pass
+// (which assumes that duplicable instruction are affected by exec mask)
+let usesCustomInserter = 1, isNotDuplicable = 1 in {
class ExportSwzInst : InstR600ISA<(
outs),
@@ -805,12 +815,15 @@ class CF_ALU_WORD1 {
let Word1{31} = BARRIER;
}
+def KCACHE : InstFlag<"printKCache">;
+
class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
-(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
-i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
+KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
+i32imm:$COUNT),
!strconcat(OpName, " $COUNT, @$ADDR, "
-"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
-", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
field bits<64> Inst;
@@ -823,109 +836,139 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
let Inst{63-32} = Word1;
}
-class CF_WORD0 {
+class CF_WORD0_R600 {
field bits<32> Word0;
- bits<24> ADDR;
- bits<3> JUMPTABLE_SEL;
+ bits<32> ADDR;
- let Word0{23-0} = ADDR;
- let Word0{26-24} = JUMPTABLE_SEL;
+ let Word0 = ADDR;
}
-class CF_WORD1 {
+class CF_WORD1_R600 {
field bits<32> Word1;
bits<3> POP_COUNT;
bits<5> CF_CONST;
bits<2> COND;
- bits<6> COUNT;
+ bits<3> COUNT;
+ bits<6> CALL_COUNT;
+ bits<1> COUNT_3;
+ bits<1> END_OF_PROGRAM;
bits<1> VALID_PIXEL_MODE;
- bits<8> CF_INST;
+ bits<7> CF_INST;
+ bits<1> WHOLE_QUAD_MODE;
bits<1> BARRIER;
let Word1{2-0} = POP_COUNT;
let Word1{7-3} = CF_CONST;
let Word1{9-8} = COND;
- let Word1{15-10} = COUNT;
- let Word1{20} = VALID_PIXEL_MODE;
- let Word1{29-22} = CF_INST;
+ let Word1{12-10} = COUNT;
+ let Word1{18-13} = CALL_COUNT;
+ let Word1{19} = COUNT_3;
+ let Word1{21} = END_OF_PROGRAM;
+ let Word1{22} = VALID_PIXEL_MODE;
+ let Word1{29-23} = CF_INST;
+ let Word1{30} = WHOLE_QUAD_MODE;
let Word1{31} = BARRIER;
}
-class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
-ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
field bits<64> Inst;
let CF_INST = inst;
let BARRIER = 1;
- let JUMPTABLE_SEL = 0;
let CF_CONST = 0;
let VALID_PIXEL_MODE = 0;
let COND = 0;
+ let CALL_COUNT = 0;
+ let COUNT_3 = 0;
+ let END_OF_PROGRAM = 0;
+ let WHOLE_QUAD_MODE = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
}
-def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
-"TEX $COUNT @$ADDR"> {
- let POP_COUNT = 0;
-}
-
-def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
-"VTX $COUNT @$ADDR"> {
- let POP_COUNT = 0;
-}
+class CF_WORD0_EG {
+ field bits<32> Word0;
-def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
+ bits<24> ADDR;
+ bits<3> JUMPTABLE_SEL;
-def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
+ let Word0{23-0} = ADDR;
+ let Word0{26-24} = JUMPTABLE_SEL;
}
-def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
+class CF_WORD1_EG {
+ field bits<32> Word1;
-def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
- let POP_COUNT = 0;
- let COUNT = 0;
-}
+ bits<3> POP_COUNT;
+ bits<5> CF_CONST;
+ bits<2> COND;
+ bits<6> COUNT;
+ bits<1> VALID_PIXEL_MODE;
+ bits<1> END_OF_PROGRAM;
+ bits<8> CF_INST;
+ bits<1> BARRIER;
-def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
+ let Word1{2-0} = POP_COUNT;
+ let Word1{7-3} = CF_CONST;
+ let Word1{9-8} = COND;
+ let Word1{15-10} = COUNT;
+ let Word1{20} = VALID_PIXEL_MODE;
+ let Word1{21} = END_OF_PROGRAM;
+ let Word1{29-22} = CF_INST;
+ let Word1{31} = BARRIER;
}
-def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
-}
+class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
+ field bits<64> Inst;
-def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
- let ADDR = 0;
- let COUNT = 0;
- let POP_COUNT = 0;
-}
+ let CF_INST = inst;
+ let BARRIER = 1;
+ let JUMPTABLE_SEL = 0;
+ let CF_CONST = 0;
+ let VALID_PIXEL_MODE = 0;
+ let COND = 0;
+ let END_OF_PROGRAM = 0;
-def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
- let COUNT = 0;
+ let Inst{31-0} = Word0;
+ let Inst{63-32} = Word1;
}
def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
-def STACK_SIZE : AMDGPUInst <(outs),
-(ins i32imm:$num), "nstack $num", [] > {
+def FETCH_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
field bits<8> Inst;
bits<8> num;
let Inst = num;
}
+def ALU_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
+ field bits<8> Inst;
+ bits<8> num;
+ let Inst = num;
+}
+
+def LITERALS : AMDGPUInst <(outs),
+(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
+ field bits<64> Inst;
+ bits<32> literal1;
+ bits<32> literal2;
+
+ let Inst{31-0} = literal1;
+ let Inst{63-32} = literal2;
+}
+
+def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
+ field bits<64> Inst;
+}
+
let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===//
@@ -944,58 +987,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
def SETE : R600_2OP <
0x08, "SETE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
- COND_EQ))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
>;
def SGT : R600_2OP <
0x09, "SETGT",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
- COND_GT))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
>;
def SGE : R600_2OP <
0xA, "SETGE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
- COND_GE))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
>;
def SNE : R600_2OP <
0xB, "SETNE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
- COND_NE))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
>;
def SETE_DX10 : R600_2OP <
0xC, "SETE_DX10",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
- COND_EQ))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
>;
def SETGT_DX10 : R600_2OP <
0xD, "SETGT_DX10",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
- COND_GT))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
>;
def SETGE_DX10 : R600_2OP <
0xE, "SETGE_DX10",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
- COND_GE))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
>;
def SETNE_DX10 : R600_2OP <
0xF, "SETNE_DX10",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
- COND_NE))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
>;
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
@@ -1053,38 +1080,32 @@ def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
def SETE_INT : R600_2OP <
0x3A, "SETE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
+ [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
>;
def SETGT_INT : R600_2OP <
0x3B, "SETGT_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
+ [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
>;
def SETGE_INT : R600_2OP <
0x3C, "SETGE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
+ [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
>;
def SETNE_INT : R600_2OP <
0x3D, "SETNE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
+ [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
>;
def SETGT_UINT : R600_2OP <
0x3E, "SETGT_UINT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
+ [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
>;
def SETGE_UINT : R600_2OP <
0x3F, "SETGE_UINT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
+ [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
>;
def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
@@ -1094,26 +1115,17 @@ def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
def CNDE_INT : R600_3OP <
0x1C, "CNDE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), 0,
- (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
- COND_EQ))]
+ [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
>;
def CNDGE_INT : R600_3OP <
0x1E, "CNDGE_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), 0,
- (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
- COND_GE))]
+ [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
>;
def CNDGT_INT : R600_3OP <
0x1D, "CNDGT_INT",
- [(set (i32 R600_Reg32:$dst),
- (selectcc (i32 R600_Reg32:$src0), 0,
- (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
- COND_GT))]
+ [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
>;
//===----------------------------------------------------------------------===//
@@ -1122,7 +1134,7 @@ def CNDGT_INT : R600_3OP <
def TEX_LD : R600_TEX <
0x03, "TEX_LD",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR,
imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
imm:$SAMPLER_ID, imm:$textureTarget))]
> {
@@ -1135,19 +1147,19 @@ let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
def TEX_GET_TEXTURE_RESINFO : R600_TEX <
0x04, "TEX_GET_TEXTURE_RESINFO",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_GET_GRADIENTS_H : R600_TEX <
0x07, "TEX_GET_GRADIENTS_H",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_GET_GRADIENTS_V : R600_TEX <
0x08, "TEX_GET_GRADIENTS_V",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
@@ -1163,37 +1175,37 @@ def TEX_SET_GRADIENTS_V : R600_TEX <
def TEX_SAMPLE : R600_TEX <
0x10, "TEX_SAMPLE",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C : R600_TEX <
0x18, "TEX_SAMPLE_C",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
def TEX_SAMPLE_L : R600_TEX <
0x11, "TEX_SAMPLE_L",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C_L : R600_TEX <
0x19, "TEX_SAMPLE_C_L",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
def TEX_SAMPLE_LB : R600_TEX <
0x12, "TEX_SAMPLE_LB",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
>;
def TEX_SAMPLE_C_LB : R600_TEX <
0x1A, "TEX_SAMPLE_C_LB",
- [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+ [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
>;
@@ -1223,32 +1235,22 @@ class MULADD_Common <bits<5> inst> : R600_3OP <
class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
inst, "MULADD_IEEE",
- [(set (f32 R600_Reg32:$dst),
- (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
+ [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
>;
class CNDE_Common <bits<5> inst> : R600_3OP <
inst, "CNDE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
- (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
- COND_EQ))]
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
>;
class CNDGT_Common <bits<5> inst> : R600_3OP <
inst, "CNDGT",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
- (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
- COND_GT))]
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
>;
class CNDGE_Common <bits<5> inst> : R600_3OP <
inst, "CNDGE",
- [(set R600_Reg32:$dst,
- (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
- (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
- COND_GE))]
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
>;
multiclass DOT4_Common <bits<11> inst> {
@@ -1256,7 +1258,7 @@ multiclass DOT4_Common <bits<11> inst> {
def _pseudo : R600_REDUCTION <inst,
(ins R600_Reg128:$src0, R600_Reg128:$src1),
"DOT4 $dst $src0, $src1",
- [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
+ [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))]
>;
def _real : R600_2OP <inst, "DOT4", []>;
@@ -1266,11 +1268,10 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
multiclass CUBE_Common <bits<11> inst> {
def _pseudo : InstR600 <
- inst,
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src),
"CUBE $dst $src",
- [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+ [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))],
VecALU
> {
let isPseudo = 1;
@@ -1282,23 +1283,38 @@ multiclass CUBE_Common <bits<11> inst> {
class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
inst, "EXP_IEEE", fexp2
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "FLT_TO_INT", fp_to_sint
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "INT_TO_FLT", sint_to_fp
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "FLT_TO_UINT", fp_to_uint
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "UINT_TO_FLT", uint_to_fp
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
inst, "LOG_CLAMPED", []
@@ -1306,50 +1322,84 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
inst, "LOG_IEEE", flog2
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULHI_INT", mulhs
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULHI", mulhu
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULLO_INT", mul
->;
-class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
inst, "RECIP_CLAMPED", []
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
->;
+ inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "RECIP_UINT", AMDGPUurecip
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
inst, "RECIPSQRT_IEEE", []
->;
+> {
+ let TransOnly = 1;
+ let Itinerary = TransALU;
+}
class SIN_Common <bits<11> inst> : R600_1OP <
inst, "SIN", []>{
let Trig = 1;
+ let TransOnly = 1;
+ let Itinerary = TransALU;
}
class COS_Common <bits<11> inst> : R600_1OP <
inst, "COS", []> {
let Trig = 1;
+ let TransOnly = 1;
+ let Itinerary = TransALU;
}
//===----------------------------------------------------------------------===//
@@ -1358,19 +1408,20 @@ class COS_Common <bits<11> inst> : R600_1OP <
multiclass DIV_Common <InstR600 recip_ieee> {
def : Pat<
- (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
- (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+ (int_AMDGPU_div f32:$src0, f32:$src1),
+ (MUL_IEEE $src0, (recip_ieee $src1))
>;
def : Pat<
- (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
- (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+ (fdiv f32:$src0, f32:$src1),
+ (MUL_IEEE $src0, (recip_ieee $src1))
>;
}
-class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
- (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
- (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
+ : Pat <
+ (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
+ (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
>;
//===----------------------------------------------------------------------===//
@@ -1410,14 +1461,13 @@ let Predicates = [isR600] in {
def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
- def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
+ def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
- def : Pat<(fsqrt R600_Reg32:$src),
- (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
+ def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
def R600_ExportSwz : ExportSwzInst {
- let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
let Word1{22} = 1; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
@@ -1426,25 +1476,77 @@ let Predicates = [isR600] in {
defm : ExportPattern<R600_ExportSwz, 39>;
def R600_ExportBuf : ExportBufInst {
- let Word1{20-17} = 1; // BURST_COUNT
+ let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
let Word1{22} = 1; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
let Word1{31} = 1; // BARRIER
}
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+
+ def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
+ "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
+ "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
+ "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ }
+ def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ let ADDR = 0;
+ let END_OF_PROGRAM = 1;
+ }
+
}
// Helper pattern for normalizing inputs to triginomic instructions for R700+
// cards.
class COS_PAT <InstR600 trig> : Pat<
- (fcos R600_Reg32:$src),
- (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+ (fcos f32:$src),
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
>;
class SIN_PAT <InstR600 trig> : Pat<
- (fsin R600_Reg32:$src),
- (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+ (fsin f32:$src),
+ (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
>;
//===----------------------------------------------------------------------===//
@@ -1482,11 +1584,10 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;
-def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
def : SIN_PAT <SIN_eg>;
def : COS_PAT <COS_eg>;
-def : Pat<(fsqrt R600_Reg32:$src),
- (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
+def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
} // End Predicates = [isEG]
//===----------------------------------------------------------------------===//
@@ -1510,15 +1611,17 @@ let Predicates = [isEGorCayman] in {
// (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16
// (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24
def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
- [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
- R600_Reg32:$src1,
- R600_Reg32:$src2))],
+ [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
+ i32:$src2))],
VecALU
>;
+ def : BFEPattern <BFE_UINT_eg>;
+
+ def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
+ defm : BFIPatterns <BFI_INT_eg>;
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
- [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
- R600_Reg32:$src2))],
+ [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
VecALU
>;
@@ -1563,14 +1666,15 @@ let hasSideEffects = 1 in {
// XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
// which do not need to be truncated since the fp values are 0.0f or 1.0f.
// We should look into handling these cases separately.
- def : Pat<(fp_to_sint R600_Reg32:$src0),
- (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
+ def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
+
+ def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
- def : Pat<(fp_to_uint R600_Reg32:$src0),
- (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
+ // SHA-256 Patterns
+ def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
def EG_ExportSwz : ExportSwzInst {
- let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{19-16} = 0; // BURST_COUNT
let Word1{20} = 1; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
@@ -1580,7 +1684,7 @@ let hasSideEffects = 1 in {
defm : ExportPattern<EG_ExportSwz, 83>;
def EG_ExportBuf : ExportBufInst {
- let Word1{19-16} = 1; // BURST_COUNT
+ let Word1{19-16} = 0; // BURST_COUNT
let Word1{20} = 1; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
@@ -1589,6 +1693,57 @@ let hasSideEffects = 1 in {
}
defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
+ def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "TEX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+ "VTX $COUNT @$ADDR"> {
+ let POP_COUNT = 0;
+ }
+ def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
+ "LOOP_START_DX10 @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
+ "LOOP_BREAK @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
+ "CONTINUE @$ADDR"> {
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
+ def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "JUMP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "ELSE @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
+ let ADDR = 0;
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ }
+ def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+ "POP @$ADDR POP:$POP_COUNT"> {
+ let COUNT = 0;
+ }
+ def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> {
+ let COUNT = 0;
+ let POP_COUNT = 0;
+ let ADDR = 0;
+ let END_OF_PROGRAM = 1;
+ }
+
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
@@ -1618,14 +1773,14 @@ class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
(ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
0x1, "RAT_WRITE_CACHELESS_32_eg",
- [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
+ [(global_store i32:$rw_gpr, i32:$index_gpr)]
>;
//128-bit store
def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
0xf, "RAT_WRITE_CACHELESS_128",
- [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
+ [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
>;
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
@@ -1679,6 +1834,8 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
+
+ let VTXInst = 1;
}
class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
@@ -1748,19 +1905,19 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
//===----------------------------------------------------------------------===//
def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
- [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
+ [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
>;
def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
- [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
+ [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
>;
def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
- [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+ [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
>;
def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
- [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+ [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
>;
//===----------------------------------------------------------------------===//
@@ -1769,17 +1926,17 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
// 8-bit reads
def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
- [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
+ [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
>;
// 32-bit reads
def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
- [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+ [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
>;
// 128-bit reads
def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
- [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+ [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
>;
//===----------------------------------------------------------------------===//
@@ -1788,7 +1945,7 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
//===----------------------------------------------------------------------===//
def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
- [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
+ [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
>;
}
@@ -1818,22 +1975,27 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
-def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
def : SIN_PAT <SIN_cm>;
def : COS_PAT <COS_cm>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
// RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <
- (AMDGPUurecip R600_Reg32:$src0),
- (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
- (MOV_IMM_I32 0x4f800000)))
+ (AMDGPUurecip i32:$src0),
+ (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
+ (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
>;
+ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
+ let ADDR = 0;
+ let POP_COUNT = 0;
+ let COUNT = 0;
+ }
-def : Pat<(fsqrt R600_Reg32:$src),
- (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
+def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
} // End isCayman
@@ -1855,21 +2017,21 @@ def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
let isPseudo = 1 in {
def PRED_X : InstR600 <
- 0, (outs R600_Predicate_Bit:$dst),
+ (outs R600_Predicate_Bit:$dst),
(ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
"", [], NullALU> {
let FlagOperandIdx = 3;
}
let isTerminator = 1, isBranch = 1 in {
-def JUMP_COND : InstR600 <0x10,
+def JUMP_COND : InstR600 <
(outs),
(ins brtarget:$target, R600_Predicate_Bit:$p),
"JUMP $target ($p)",
[], AnyALU
>;
-def JUMP : InstR600 <0x10,
+def JUMP : InstR600 <
(outs),
(ins brtarget:$target),
"JUMP $target",
@@ -1896,20 +2058,28 @@ def MASK_WRITE : AMDGPUShaderInst <
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
-def TXD: AMDGPUShaderInst <
+def TXD: InstR600 <
(outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
+ i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
"TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
- [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
->;
+ [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
+ imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
+ NullALU > {
+ let TEXInst = 1;
+}
-def TXD_SHADOW: AMDGPUShaderInst <
+def TXD_SHADOW: InstR600 <
(outs R600_Reg128:$dst),
- (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+ (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
+ i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
"TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
- [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
->;
-
+ [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
+ imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
+ NullALU
+> {
+ let TEXInst = 1;
+}
} // End isPseudo = 1
} // End usesCustomInserter = 1
@@ -1946,7 +2116,7 @@ def CONST_COPY : Instruction {
def TEX_VTX_CONSTBUF :
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
- [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+ [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
VTX_WORD1_GPR, VTX_WORD0 {
let VC_INST = 0;
@@ -1995,11 +2165,12 @@ def TEX_VTX_CONSTBUF :
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
+ let VTXInst = 1;
}
def TEX_VTX_TEXBUF:
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
- [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+ [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
VTX_WORD1_GPR, VTX_WORD0 {
let VC_INST = 0;
@@ -2048,6 +2219,7 @@ let Inst{63-32} = Word1;
// VTX_WORD3 (Padding)
//
// Inst{127-96} = 0;
+ let VTXInst = 1;
}
@@ -2124,9 +2296,8 @@ let isTerminator=1 in {
// CND*_INT Pattterns for f32 True / False values
class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
- (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
- R600_Reg32:$src2, cc),
- (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+ (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
+ (cnd $src0, $src1, $src2)
>;
def : CND_INT_f32 <CNDE_INT, SETEQ>;
@@ -2135,9 +2306,8 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>;
//CNDGE_INT extra pattern
def : Pat <
- (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
- (i32 R600_Reg32:$src2), COND_GT),
- (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+ (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
+ (CNDGE_INT $src0, $src1, $src2)
>;
// KIL Patterns
@@ -2147,56 +2317,56 @@ def KILP : Pat <
>;
def KIL : Pat <
- (int_AMDGPU_kill R600_Reg32:$src0),
- (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+ (int_AMDGPU_kill f32:$src0),
+ (MASK_WRITE (KILLGT (f32 ZERO), $src0))
>;
// SGT Reverse args
def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
- (SGT R600_Reg32:$src1, R600_Reg32:$src0)
+ (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
+ (SGT $src1, $src0)
>;
// SGE Reverse args
def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
- (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+ (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
+ (SGE $src1, $src0)
>;
// SETGT_DX10 reverse args
def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
- (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+ (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
+ (SETGT_DX10 $src1, $src0)
>;
// SETGE_DX10 reverse args
def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
- (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+ (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
+ (SETGE_DX10 $src1, $src0)
>;
// SETGT_INT reverse args
def : Pat <
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
- (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
+ (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
+ (SETGT_INT $src1, $src0)
>;
// SETGE_INT reverse args
def : Pat <
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
- (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
+ (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
+ (SETGE_INT $src1, $src0)
>;
// SETGT_UINT reverse args
def : Pat <
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
- (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+ (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
+ (SETGT_UINT $src1, $src0)
>;
// SETGE_UINT reverse args
def : Pat <
- (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
- (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+ (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
+ (SETGE_UINT $src1, $src0)
>;
// The next two patterns are special cases for handling 'true if ordered' and
@@ -2209,50 +2379,50 @@ def : Pat <
//SETE - 'true if ordered'
def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
- (SETE R600_Reg32:$src0, R600_Reg32:$src1)
+ (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
+ (SETE $src0, $src1)
>;
//SETE_DX10 - 'true if ordered'
def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
- (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+ (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
+ (SETE_DX10 $src0, $src1)
>;
//SNE - 'true if unordered'
def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
- (SNE R600_Reg32:$src0, R600_Reg32:$src1)
+ (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
+ (SNE $src0, $src1)
>;
//SETNE_DX10 - 'true if ordered'
def : Pat <
- (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
- (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+ (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
+ (SETNE_DX10 $src0, $src1)
>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
+def : Extract_Element <f32, v4f32, 0, sub0>;
+def : Extract_Element <f32, v4f32, 1, sub1>;
+def : Extract_Element <f32, v4f32, 2, sub2>;
+def : Extract_Element <f32, v4f32, 3, sub3>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
+def : Insert_Element <f32, v4f32, 0, sub0>;
+def : Insert_Element <f32, v4f32, 1, sub1>;
+def : Insert_Element <f32, v4f32, 2, sub2>;
+def : Insert_Element <f32, v4f32, 3, sub3>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
+def : Extract_Element <i32, v4i32, 0, sub0>;
+def : Extract_Element <i32, v4i32, 1, sub1>;
+def : Extract_Element <i32, v4i32, 2, sub2>;
+def : Extract_Element <i32, v4i32, 3, sub3>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
+def : Insert_Element <i32, v4i32, 0, sub0>;
+def : Insert_Element <i32, v4i32, 1, sub1>;
+def : Insert_Element <i32, v4i32, 2, sub2>;
+def : Insert_Element <i32, v4i32, 3, sub3>;
-def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+def : Vector4_Build <v4f32, f32>;
+def : Vector4_Build <v4i32, i32>;
// bitconvert patterns
diff --git a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
index 99c1f91..70fddbb 100644
--- a/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/R600/R600MachineFunctionInfo.h
@@ -25,6 +25,7 @@ public:
R600MachineFunctionInfo(const MachineFunction &MF);
SmallVector<unsigned, 4> LiveOuts;
std::vector<unsigned> IndirectRegs;
+ unsigned StackSize;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/R600/R600Packetizer.cpp b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
new file mode 100644
index 0000000..cd7b7d0
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/R600Packetizer.cpp
@@ -0,0 +1,459 @@
+//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass implements instructions packetization for R600. It unsets isLast
+/// bit of instructions inside a bundle and substitutes src register with
+/// PreviousVector when applicable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600PACKETIZER_CPP
+#define R600PACKETIZER_CPP
+
+#define DEBUG_TYPE "packets"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+
+namespace llvm {
+
+class R600Packetizer : public MachineFunctionPass {
+
+public:
+ static char ID;
+ R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ const char *getPassName() const {
+ return "R600 Packetizer";
+ }
+
+ bool runOnMachineFunction(MachineFunction &Fn);
+};
+char R600Packetizer::ID = 0;
+
+class R600PacketizerList : public VLIWPacketizerList {
+
+private:
+ const R600InstrInfo *TII;
+ const R600RegisterInfo &TRI;
+
+ enum BankSwizzle {
+ ALU_VEC_012 = 0,
+ ALU_VEC_021,
+ ALU_VEC_120,
+ ALU_VEC_102,
+ ALU_VEC_201,
+ ALU_VEC_210
+ };
+
+ unsigned getSlot(const MachineInstr *MI) const {
+ return TRI.getHWRegChan(MI->getOperand(0).getReg());
+ }
+
+ /// \returns register to PV chan mapping for bundle/single instructions that
+ /// immediatly precedes I.
+ DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
+ const {
+ DenseMap<unsigned, unsigned> Result;
+ I--;
+ if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
+ return Result;
+ MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+ if (I->isBundle())
+ BI++;
+ do {
+ if (TII->isPredicated(BI))
+ continue;
+ if (TII->isTransOnly(BI))
+ continue;
+ int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE);
+ if (OperandIdx < 0)
+ continue;
+ if (BI->getOperand(OperandIdx).getImm() == 0)
+ continue;
+ unsigned Dst = BI->getOperand(0).getReg();
+ if (BI->getOpcode() == AMDGPU::DOT4_r600_real) {
+ Result[Dst] = AMDGPU::PV_X;
+ continue;
+ }
+ unsigned PVReg = 0;
+ switch (TRI.getHWRegChan(Dst)) {
+ case 0:
+ PVReg = AMDGPU::PV_X;
+ break;
+ case 1:
+ PVReg = AMDGPU::PV_Y;
+ break;
+ case 2:
+ PVReg = AMDGPU::PV_Z;
+ break;
+ case 3:
+ PVReg = AMDGPU::PV_W;
+ break;
+ default:
+ llvm_unreachable("Invalid Chan");
+ }
+ Result[Dst] = PVReg;
+ } while ((++BI)->isBundledWithPred());
+ return Result;
+ }
+
+ void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
+ const {
+ R600Operands::Ops Ops[] = {
+ R600Operands::SRC0,
+ R600Operands::SRC1,
+ R600Operands::SRC2
+ };
+ for (unsigned i = 0; i < 3; i++) {
+ int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+ if (OperandIdx < 0)
+ continue;
+ unsigned Src = MI->getOperand(OperandIdx).getReg();
+ const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
+ if (It != PVs.end())
+ MI->getOperand(OperandIdx).setReg(It->second);
+ }
+ }
+public:
+ // Ctor.
+ R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT)
+ : VLIWPacketizerList(MF, MLI, MDT, true),
+ TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
+ TRI(TII->getRegisterInfo()) { }
+
+ // initPacketizerState - initialize some internal flags.
+ void initPacketizerState() { }
+
+ // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+ bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
+ return false;
+ }
+
+ // isSoloInstruction - return true if instruction MI can not be packetized
+ // with any other instruction, which means that MI itself is a packet.
+ bool isSoloInstruction(MachineInstr *MI) {
+ if (TII->isVector(*MI))
+ return true;
+ if (!TII->isALUInstr(MI->getOpcode()))
+ return true;
+ if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
+ return true;
+ if (TII->isTransOnly(MI))
+ return true;
+ return false;
+ }
+
+ // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+ // together.
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
+ if (getSlot(MII) <= getSlot(MIJ))
+ return false;
+ // Does MII and MIJ share the same pred_sel ?
+ int OpI = TII->getOperandIdx(MII->getOpcode(), R600Operands::PRED_SEL),
+ OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600Operands::PRED_SEL);
+ unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
+ PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
+ if (PredI != PredJ)
+ return false;
+ if (SUJ->isSucc(SUI)) {
+ for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
+ const SDep &Dep = SUJ->Succs[i];
+ if (Dep.getSUnit() != SUI)
+ continue;
+ if (Dep.getKind() == SDep::Anti)
+ continue;
+ if (Dep.getKind() == SDep::Output)
+ if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
+ continue;
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // and SUJ.
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;}
+
+ void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
+ unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600Operands::LAST);
+ MI->getOperand(LastOp).setImm(Bit);
+ }
+
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
+ CurrentPacketMIs.push_back(MI);
+ bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
+ DEBUG(
+ if (!FitsConstLimits) {
+ dbgs() << "Couldn't pack :\n";
+ MI->dump();
+ dbgs() << "with the following packets :\n";
+ for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+ CurrentPacketMIs[i]->dump();
+ dbgs() << "\n";
+ }
+ dbgs() << "because of Consts read limitations\n";
+ });
+ const DenseMap<unsigned, unsigned> &PV =
+ getPreviousVector(CurrentPacketMIs.front());
+ bool FitsReadPortLimits = fitsReadPortLimitation(CurrentPacketMIs, PV);
+ DEBUG(
+ if (!FitsReadPortLimits) {
+ dbgs() << "Couldn't pack :\n";
+ MI->dump();
+ dbgs() << "with the following packets :\n";
+ for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+ CurrentPacketMIs[i]->dump();
+ dbgs() << "\n";
+ }
+ dbgs() << "because of Read port limitations\n";
+ });
+ bool isBundlable = FitsConstLimits && FitsReadPortLimits;
+ CurrentPacketMIs.pop_back();
+ if (!isBundlable) {
+ endPacket(MI->getParent(), MI);
+ substitutePV(MI, getPreviousVector(MI));
+ return VLIWPacketizerList::addToPacket(MI);
+ }
+ if (!CurrentPacketMIs.empty())
+ setIsLastBit(CurrentPacketMIs.back(), 0);
+ substitutePV(MI, PV);
+ return VLIWPacketizerList::addToPacket(MI);
+ }
+private:
+ std::vector<std::pair<int, unsigned> >
+ ExtractSrcs(const MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV)
+ const {
+ R600Operands::Ops Ops[] = {
+ R600Operands::SRC0,
+ R600Operands::SRC1,
+ R600Operands::SRC2
+ };
+ std::vector<std::pair<int, unsigned> > Result;
+ for (unsigned i = 0; i < 3; i++) {
+ int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+ if (OperandIdx < 0){
+ Result.push_back(std::pair<int, unsigned>(-1,0));
+ continue;
+ }
+ unsigned Src = MI->getOperand(OperandIdx).getReg();
+ if (PV.find(Src) != PV.end()) {
+ Result.push_back(std::pair<int, unsigned>(-1,0));
+ continue;
+ }
+ unsigned Reg = TRI.getEncodingValue(Src) & 0xff;
+ if (Reg > 127) {
+ Result.push_back(std::pair<int, unsigned>(-1,0));
+ continue;
+ }
+ unsigned Chan = TRI.getHWRegChan(Src);
+ Result.push_back(std::pair<int, unsigned>(Reg, Chan));
+ }
+ return Result;
+ }
+
+ std::vector<std::pair<int, unsigned> >
+ Swizzle(std::vector<std::pair<int, unsigned> > Src,
+ BankSwizzle Swz) const {
+ switch (Swz) {
+ case ALU_VEC_012:
+ break;
+ case ALU_VEC_021:
+ std::swap(Src[1], Src[2]);
+ break;
+ case ALU_VEC_102:
+ std::swap(Src[0], Src[1]);
+ break;
+ case ALU_VEC_120:
+ std::swap(Src[0], Src[1]);
+ std::swap(Src[0], Src[2]);
+ break;
+ case ALU_VEC_201:
+ std::swap(Src[0], Src[2]);
+ std::swap(Src[0], Src[1]);
+ break;
+ case ALU_VEC_210:
+ std::swap(Src[0], Src[2]);
+ break;
+ }
+ return Src;
+ }
+
+ bool isLegal(const std::vector<MachineInstr *> &IG,
+ const std::vector<BankSwizzle> &Swz,
+ const DenseMap<unsigned, unsigned> &PV) const {
+ assert (Swz.size() == IG.size());
+ int Vector[4][3];
+ memset(Vector, -1, sizeof(Vector));
+ for (unsigned i = 0, e = IG.size(); i < e; i++) {
+ const std::vector<std::pair<int, unsigned> > &Srcs =
+ Swizzle(ExtractSrcs(IG[i], PV), Swz[i]);
+ for (unsigned j = 0; j < 3; j++) {
+ const std::pair<int, unsigned> &Src = Srcs[j];
+ if (Src.first < 0)
+ continue;
+ if (Vector[Src.second][j] < 0)
+ Vector[Src.second][j] = Src.first;
+ if (Vector[Src.second][j] != Src.first)
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool recursiveFitsFPLimitation(
+ std::vector<MachineInstr *> IG,
+ const DenseMap<unsigned, unsigned> &PV,
+ std::vector<BankSwizzle> &SwzCandidate,
+ std::vector<MachineInstr *> CurrentlyChecked)
+ const {
+ if (!isLegal(CurrentlyChecked, SwzCandidate, PV))
+ return false;
+ if (IG.size() == CurrentlyChecked.size()) {
+ return true;
+ }
+ BankSwizzle AvailableSwizzle[] = {
+ ALU_VEC_012,
+ ALU_VEC_021,
+ ALU_VEC_120,
+ ALU_VEC_102,
+ ALU_VEC_201,
+ ALU_VEC_210
+ };
+ CurrentlyChecked.push_back(IG[CurrentlyChecked.size()]);
+ for (unsigned i = 0; i < 6; i++) {
+ SwzCandidate.push_back(AvailableSwizzle[i]);
+ if (recursiveFitsFPLimitation(IG, PV, SwzCandidate, CurrentlyChecked))
+ return true;
+ SwzCandidate.pop_back();
+ }
+ return false;
+ }
+
+ bool fitsReadPortLimitation(
+ std::vector<MachineInstr *> IG,
+ const DenseMap<unsigned, unsigned> &PV)
+ const {
+ //Todo : support shared src0 - src1 operand
+ std::vector<BankSwizzle> SwzCandidate;
+ bool Result = recursiveFitsFPLimitation(IG, PV, SwzCandidate,
+ std::vector<MachineInstr *>());
+ if (!Result)
+ return false;
+ for (unsigned i = 0, e = IG.size(); i < e; i++) {
+ MachineInstr *MI = IG[i];
+ unsigned Op = TII->getOperandIdx(MI->getOpcode(),
+ R600Operands::BANK_SWIZZLE);
+ MI->getOperand(Op).setImm(SwzCandidate[i]);
+ }
+ return true;
+ }
+};
+
+bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
+ const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+ // Instantiate the packetizer.
+ R600PacketizerList Packetizer(Fn, MLI, MDT);
+
+ // DFA state table should not be empty.
+ assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+ //
+ // Loop over all basic blocks and remove KILL pseudo-instructions
+ // These instructions confuse the dependence analysis. Consider:
+ // D0 = ... (Insn 0)
+ // R0 = KILL R0, D0 (Insn 1)
+ // R0 = ... (Insn 2)
+ // Here, Insn 1 will result in the dependence graph not emitting an output
+ // dependence between Insn 0 and Insn 2. This can lead to incorrect
+ // packetization
+ //
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ MachineBasicBlock::iterator End = MBB->end();
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != End) {
+ if (MI->isKill()) {
+ MachineBasicBlock::iterator DeleteMI = MI;
+ ++MI;
+ MBB->erase(DeleteMI);
+ End = MBB->end();
+ continue;
+ }
+ ++MI;
+ }
+ }
+
+ // Loop over all of the basic blocks.
+ for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+ MBB != MBBe; ++MBB) {
+ // Find scheduling regions and schedule / packetize each region.
+ unsigned RemainingCount = MBB->size();
+ for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+ RegionEnd != MBB->begin();) {
+ // The next region starts above the previous region. Look backward in the
+ // instruction stream until we find the nearest boundary.
+ MachineBasicBlock::iterator I = RegionEnd;
+ for(;I != MBB->begin(); --I, --RemainingCount) {
+ if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
+ break;
+ }
+ I = MBB->begin();
+
+ // Skip empty scheduling regions.
+ if (I == RegionEnd) {
+ RegionEnd = llvm::prior(RegionEnd);
+ --RemainingCount;
+ continue;
+ }
+ // Skip regions with one instruction.
+ if (I == llvm::prior(RegionEnd)) {
+ RegionEnd = llvm::prior(RegionEnd);
+ continue;
+ }
+
+ Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+ RegionEnd = I;
+ }
+ }
+
+ return true;
+
+}
+
+}
+
+llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
+ return new R600Packetizer(tm);
+}
+
+#endif // R600PACKETIZER_CPP
diff --git a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
index 03f4976..bfc546b 100644
--- a/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
+++ b/contrib/llvm/lib/Target/R600/R600RegisterInfo.td
@@ -88,8 +88,14 @@ def NEG_ONE : R600Reg<"-1.0", 249>;
def ONE_INT : R600Reg<"1", 250>;
def HALF : R600Reg<"0.5", 252>;
def NEG_HALF : R600Reg<"-0.5", 252>;
-def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
-def PV_X : R600Reg<"pv.x", 254>;
+def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">;
+def ALU_LITERAL_Y : R600RegWithChan<"literal.y", 253, "Y">;
+def ALU_LITERAL_Z : R600RegWithChan<"literal.z", 253, "Z">;
+def ALU_LITERAL_W : R600RegWithChan<"literal.w", 253, "W">;
+def PV_X : R600RegWithChan<"PV.x", 254, "X">;
+def PV_Y : R600RegWithChan<"PV.y", 254, "Y">;
+def PV_Z : R600RegWithChan<"PV.z", 254, "Z">;
+def PV_W : R600RegWithChan<"PV.w", 254, "W">;
def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
diff --git a/contrib/llvm/lib/Target/R600/R600Schedule.td b/contrib/llvm/lib/Target/R600/R600Schedule.td
index 7ede181..78a460a 100644
--- a/contrib/llvm/lib/Target/R600/R600Schedule.td
+++ b/contrib/llvm/lib/Target/R600/R600Schedule.td
@@ -24,7 +24,7 @@ def AnyALU : InstrItinClass;
def VecALU : InstrItinClass;
def TransALU : InstrItinClass;
-def R600_EG_Itin : ProcessorItineraries <
+def R600_VLIW5_Itin : ProcessorItineraries <
[ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
[],
[
@@ -34,3 +34,14 @@ def R600_EG_Itin : ProcessorItineraries <
InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
]
>;
+
+def R600_VLIW4_Itin : ProcessorItineraries <
+ [ALU_X, ALU_Y, ALU_Z, ALU_W, ALU_NULL],
+ [],
+ [
+ InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
+ InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+ InstrItinData<TransALU, [InstrStage<1, [ALU_NULL]>]>,
+ InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+ ]
+>;
diff --git a/contrib/llvm/lib/Target/R600/SIDefines.h b/contrib/llvm/lib/Target/R600/SIDefines.h
new file mode 100644
index 0000000..716b093
--- /dev/null
+++ b/contrib/llvm/lib/Target/R600/SIDefines.h
@@ -0,0 +1,22 @@
+//===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef SIDEFINES_H_
+#define SIDEFINES_H_
+
+#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
+#define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128
+#define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228
+#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
+#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
+#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
+#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
+
+#endif // SIDEFINES_H_
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
index 6f0c307..6bd82a5 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.cpp
@@ -49,6 +49,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -70,6 +71,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
+
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
@@ -234,6 +239,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
}
return SDValue();
}
@@ -332,6 +338,32 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
return Chain;
}
+#define RSRC_DATA_FORMAT 0xf00000000000
+
+SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue VirtualAddress = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+
+ if (StoreNode->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) {
+ return SDValue();
+ }
+
+ SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
+ DAG.getConstant(0, MVT::i64),
+ DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64));
+
+ SDValue Ops[2];
+ Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain,
+ Value, SrcSrc, VirtualAddress);
+ Ops[1] = Chain;
+
+ return DAG.getMergeValues(Ops, 2, DL);
+
+}
+
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -424,9 +456,12 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
float F;
} Imm;
- if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N))
+ if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
+ if (Node->getZExtValue() >> 32) {
+ return -1;
+ }
Imm.I = Node->getSExtValue();
- else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+ } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
Imm.F = Node->getValueAPF().convertToFloat();
else
return -1; // It isn't an immediate
@@ -534,8 +569,9 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
Operand = SDValue(Node, 0);
}
-SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
- SelectionDAG &DAG) const {
+/// \brief Try to fold the Nodes operands into the Node
+SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
// Original encoding (either e32 or e64)
int Opcode = Node->getMachineOpcode();
@@ -666,5 +702,116 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
// Create a complete new instruction
return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
- Node->getVTList(), Ops.data(), Ops.size());
+ Node->getVTList(), Ops);
+}
+
+/// \brief Helper function for adjustWritemask
+unsigned SubIdx2Lane(unsigned Idx) {
+ switch (Idx) {
+ default: return 0;
+ case AMDGPU::sub0: return 0;
+ case AMDGPU::sub1: return 1;
+ case AMDGPU::sub2: return 2;
+ case AMDGPU::sub3: return 3;
+ }
+}
+
+/// \brief Adjust the writemask of MIMG instructions
+void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
+ SelectionDAG &DAG) const {
+ SDNode *Users[4] = { };
+ unsigned Writemask = 0, Lane = 0;
+
+ // Try to figure out the used register components
+ for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
+ I != E; ++I) {
+
+ // Abort if we can't understand the usage
+ if (!I->isMachineOpcode() ||
+ I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
+ return;
+
+ Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+
+ // Abort if we have more than one user per component
+ if (Users[Lane])
+ return;
+
+ Users[Lane] = *I;
+ Writemask |= 1 << Lane;
+ }
+
+ // Abort if all components are used
+ if (Writemask == 0xf)
+ return;
+
+ // Adjust the writemask in the node
+ std::vector<SDValue> Ops;
+ Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(Node->getOperand(i));
+ Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+
+ // If we only got one lane, replace it with a copy
+ if (Writemask == (1U << Lane)) {
+ SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
+ SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ DebugLoc(), Users[Lane]->getValueType(0),
+ SDValue(Node, 0), RC);
+ DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+ return;
+ }
+
+ // Update the users of the node with the new indices
+ for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+
+ SDNode *User = Users[i];
+ if (!User)
+ continue;
+
+ SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
+
+ switch (Idx) {
+ default: break;
+ case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
+ case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
+ case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+ }
+ }
+}
+
+/// \brief Fold the instructions after slecting them
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
+
+ if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
+ adjustWritemask(Node, DAG);
+
+ return foldOperands(Node, DAG);
+}
+
+/// \brief Assign the register class depending on the number of
+/// bits set in the writemask
+void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+ return;
+
+ unsigned VReg = MI->getOperand(0).getReg();
+ unsigned Writemask = MI->getOperand(1).getImm();
+ unsigned BitsSet = 0;
+ for (unsigned i = 0; i < 4; ++i)
+ BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+ const TargetRegisterClass *RC;
+ switch (BitsSet) {
+ default: return;
+ case 1: RC = &AMDGPU::VReg_32RegClass; break;
+ case 2: RC = &AMDGPU::VReg_64RegClass; break;
+ case 3: RC = &AMDGPU::VReg_96RegClass; break;
+ }
+
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MRI.setRegClass(VReg, RC);
}
diff --git a/contrib/llvm/lib/Target/R600/SIISelLowering.h b/contrib/llvm/lib/Target/R600/SIISelLowering.h
index 5ad2f40..de637be 100644
--- a/contrib/llvm/lib/Target/R600/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/R600/SIISelLowering.h
@@ -24,6 +24,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
const SIInstrInfo * TII;
const TargetRegisterInfo * TRI;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
@@ -33,6 +34,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
unsigned RegClass, bool &ScalarSlotUsed) const;
+ SDNode *foldOperands(MachineSDNode *N, SelectionDAG &DAG) const;
+ void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
+
public:
SITargetLowering(TargetMachine &tm);
@@ -49,6 +53,8 @@ public:
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+ virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const;
int32_t analyzeImmediate(const SDNode *N) const;
};
diff --git a/contrib/llvm/lib/Target/R600/SIInstrFormats.td b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
index 3891ddb..f737ddd 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrFormats.td
@@ -284,33 +284,33 @@ let Uses = [EXEC] in {
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64<outs, ins, asm, pattern> {
- bits<8> VDATA;
- bits<12> OFFSET;
- bits<1> OFFEN;
- bits<1> IDXEN;
- bits<1> GLC;
- bits<1> ADDR64;
- bits<1> LDS;
- bits<8> VADDR;
- bits<7> SRSRC;
- bits<1> SLC;
- bits<1> TFE;
- bits<8> SOFFSET;
-
- let Inst{11-0} = OFFSET;
- let Inst{12} = OFFEN;
- let Inst{13} = IDXEN;
- let Inst{14} = GLC;
- let Inst{15} = ADDR64;
- let Inst{16} = LDS;
+ bits<12> offset;
+ bits<1> offen;
+ bits<1> idxen;
+ bits<1> glc;
+ bits<1> addr64;
+ bits<1> lds;
+ bits<8> vaddr;
+ bits<8> vdata;
+ bits<7> srsrc;
+ bits<1> slc;
+ bits<1> tfe;
+ bits<8> soffset;
+
+ let Inst{11-0} = offset;
+ let Inst{12} = offen;
+ let Inst{13} = idxen;
+ let Inst{14} = glc;
+ let Inst{15} = addr64;
+ let Inst{16} = lds;
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
- let Inst{39-32} = VADDR;
- let Inst{47-40} = VDATA;
- let Inst{52-48} = SRSRC{6-2};
- let Inst{54} = SLC;
- let Inst{55} = TFE;
- let Inst{63-56} = SOFFSET;
+ let Inst{39-32} = vaddr;
+ let Inst{47-40} = vdata;
+ let Inst{52-48} = srsrc{6-2};
+ let Inst{54} = slc;
+ let Inst{55} = tfe;
+ let Inst{63-56} = soffset;
let VM_CNT = 1;
let EXP_CNT = 1;
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
index 0bfcef5..9a04c60 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
};
+ const int16_t Sub0_2[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+ };
+
const int16_t Sub0_1[] = {
AMDGPU::sub0, AMDGPU::sub1, 0
};
@@ -125,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_1;
+ } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
+ assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
+ Opcode = AMDGPU::V_MOV_B32_e32;
+ SubIndices = Sub0_2;
+
} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
AMDGPU::SReg_128RegClass.contains(SrcReg));
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.h b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
index d4e60e5..87eff4d 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.h
@@ -80,6 +80,7 @@ namespace AMDGPU {
int getVOPe64(uint16_t Opcode);
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
+ int isMIMG(uint16_t Opcode);
} // End namespace AMDGPU
diff --git a/contrib/llvm/lib/Target/R600/SIInstrInfo.td b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
index 617f0b8..c8aecb7 100644
--- a/contrib/llvm/lib/Target/R600/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIInstrInfo.td
@@ -26,6 +26,10 @@ def HI32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
}]>;
+def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE",
+ SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+ [SDNPHasChain, SDNPMayStore]>;
+
def IMM8bitDWORD : ImmLeaf <
i32, [{
return (Imm & ~0x3FC) == 0;
@@ -255,14 +259,14 @@ multiclass VOPC_64 <bits<8> op, string opName,
class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
- i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+ InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
>, VOP <opName>;
class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_64:$dst),
(ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
- i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+ InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
>, VOP <opName>;
@@ -285,17 +289,39 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
op,
- (outs regClass:$dst),
+ (outs regClass:$vdata),
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
i1imm:$tfe, SSrc_32:$soffset),
- asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
+ asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, "
#"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
[]> {
let mayLoad = 1;
let mayStore = 0;
}
+class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
+ ValueType VT> :
+ MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr),
+ name#" $vdata, $srsrc + $vaddr",
+ [(SIbuffer_store (VT vdataClass:$vdata), (i128 SReg_128:$srsrc),
+ (i64 VReg_64:$vaddr))]> {
+
+ let mayLoad = 0;
+ let mayStore = 1;
+
+ // Encoding
+ let offset = 0;
+ let offen = 0;
+ let idxen = 0;
+ let glc = 0;
+ let addr64 = 1;
+ let lds = 0;
+ let slc = 0;
+ let tfe = 0;
+ let soffset = 128; // ZERO
+}
+
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs regClass:$dst),
@@ -309,7 +335,22 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF
let mayStore = 0;
}
-class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+class MIMG_NoSampler_Helper <bits<7> op, string asm> : MIMG <
+ op,
+ (outs VReg_128:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+ SReg_256:$srsrc),
+ asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+ #" $tfe, $lwe, $slc, $vaddr, $srsrc",
+ []> {
+ let SSAMP = 0;
+ let mayLoad = 1;
+ let mayStore = 0;
+ let hasPostISelHook = 1;
+}
+
+class MIMG_Sampler_Helper <bits<7> op, string asm> : MIMG <
op,
(outs VReg_128:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
@@ -320,6 +361,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
[]> {
let mayLoad = 1;
let mayStore = 0;
+ let hasPostISelHook = 1;
}
//===----------------------------------------------------------------------===//
@@ -353,4 +395,13 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
+// Test if the supplied opcode is an MIMG instruction
+def isMIMG : InstrMapping {
+ let FilterClass = "MIMG";
+ let RowFields = ["Inst"];
+ let ColFields = ["Size"];
+ let KeyCol = ["8"];
+ let ValueCols = [["8"]];
+}
+
include "SIInstructions.td"
diff --git a/contrib/llvm/lib/Target/R600/SIInstructions.td b/contrib/llvm/lib/Target/R600/SIInstructions.td
index 4f734f9..0d50c5d 100644
--- a/contrib/llvm/lib/Target/R600/SIInstructions.td
+++ b/contrib/llvm/lib/Target/R600/SIInstructions.td
@@ -108,7 +108,7 @@ VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
def S_CMPK_EQ_I32 : SOPK <
0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
"S_CMPK_EQ_I32",
- [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
+ [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
>;
*/
@@ -408,8 +408,14 @@ def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2",
def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
-//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
-//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+
+def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
+ 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32
+>;
+
+def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
+ 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64
+>;
//def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
@@ -489,7 +495,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
-//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
+def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">;
//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
@@ -498,7 +504,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
+def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">;
//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
@@ -516,20 +522,20 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">;
+def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">;
//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
-def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">;
//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
-def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
-def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">;
+def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">;
//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">;
+def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">;
//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
//def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
-def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
+def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
+def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
@@ -594,12 +600,14 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
- [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
+ [(set f32:$dst, (sint_to_fp i32:$src0))]
+>;
+defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
+ [(set f32:$dst, (uint_to_fp i32:$src0))]
>;
-//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
- [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
+ [(set i32:$dst, (fp_to_sint f32:$src0))]
>;
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
@@ -616,35 +624,37 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
- [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))]
+ [(set f32:$dst, (AMDGPUfract f32:$src0))]
+>;
+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
+ [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))]
>;
-defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
- [(set VReg_32:$dst, (fceil VSrc_32:$src0))]
+ [(set f32:$dst, (fceil f32:$src0))]
>;
defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
- [(set VReg_32:$dst, (frint VSrc_32:$src0))]
+ [(set f32:$dst, (frint f32:$src0))]
>;
defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
- [(set VReg_32:$dst, (ffloor VSrc_32:$src0))]
+ [(set f32:$dst, (ffloor f32:$src0))]
>;
defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
- [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))]
+ [(set f32:$dst, (fexp2 f32:$src0))]
>;
defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
- [(set VReg_32:$dst, (flog2 VSrc_32:$src0))]
+ [(set f32:$dst, (flog2 f32:$src0))]
>;
defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
- [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))]
+ [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
>;
defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
defm V_RSQ_LEGACY_F32 : VOP1_32 <
0x0000002d, "V_RSQ_LEGACY_F32",
- [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))]
+ [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
>;
defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
@@ -787,14 +797,13 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
"V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
- [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2),
- VSrc_32:$src1, VSrc_32:$src0))]
+ [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
>;
//f32 pattern for V_CNDMASK_B32_e64
def : Pat <
- (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
- (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
+ (f32 (select i1:$src2, f32:$src1, f32:$src0)),
+ (V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
@@ -802,11 +811,11 @@ defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
- [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
+ [(set f32:$dst, (fadd f32:$src0, f32:$src1))]
>;
defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
- [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
+ [(set f32:$dst, (fsub f32:$src0, f32:$src1))]
>;
defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
} // End isCommutable = 1
@@ -817,11 +826,11 @@ let isCommutable = 1 in {
defm V_MUL_LEGACY_F32 : VOP2_32 <
0x00000007, "V_MUL_LEGACY_F32",
- [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))]
+ [(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))]
>;
defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
- [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))]
+ [(set f32:$dst, (fmul f32:$src0, f32:$src1))]
>;
} // End isCommutable = 1
@@ -834,43 +843,51 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
let isCommutable = 1 in {
defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
- [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))]
+ [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
>;
defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
- [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))]
+ [(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))]
>;
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
+ [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
+>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
+ [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
+>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
+ [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
+>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
+ [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
+>;
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
- [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+ [(set i32:$dst, (srl i32:$src0, i32:$src1))]
>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
- [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
+ [(set i32:$dst, (sra i32:$src0, i32:$src1))]
>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
- [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+ [(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
- [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
+ [(set i32:$dst, (and i32:$src0, i32:$src1))]
>;
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
- [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))]
+ [(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
- [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))]
+ [(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
} // End isCommutable = 1
@@ -885,11 +902,11 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
- [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+ [(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
>;
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
- [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+ [(set i32:$dst, (sub i32:$src0, i32:$src1))]
>;
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
@@ -905,7 +922,7 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
- [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))]
+ [(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))]
>;
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
@@ -942,6 +959,7 @@ def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+defm : BFIPatterns <V_BFI_B32>;
def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
@@ -983,18 +1001,18 @@ def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
} // isCommutable = 1
def : Pat <
- (mul VSrc_32:$src0, VReg_32:$src1),
- (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+ (mul i32:$src0, i32:$src1),
+ (V_MUL_LO_I32 $src0, $src1, (i32 0))
>;
def : Pat <
- (mulhu VSrc_32:$src0, VReg_32:$src1),
- (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+ (mulhu i32:$src0, i32:$src1),
+ (V_MUL_HI_U32 $src0, $src1, (i32 0))
>;
def : Pat <
- (mulhs VSrc_32:$src0, VReg_32:$src1),
- (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+ (mulhs i32:$src0, i32:$src1),
+ (V_MUL_HI_I32 $src0, $src1, (i32 0))
>;
def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
@@ -1019,34 +1037,27 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst),
(ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
- [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
- SReg_32:$src0, SReg_32:$src1))]
+ []
>;
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
-// f32 pattern for S_CSELECT_B32
-def : Pat <
- (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
- (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
->;
-
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
- [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
+ [(set i64:$dst, (and i64:$src0, i64:$src1))]
>;
def : Pat <
- (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
- (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
+ (i1 (and i1:$src0, i1:$src1)),
+ (S_AND_B64 $src0, $src1)
>;
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
def : Pat <
- (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
- (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
+ (i1 (or i1:$src0, i1:$src1)),
+ (S_OR_B64 $src0, $src1)
>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
@@ -1097,14 +1108,14 @@ def SI_IF : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, brtarget:$target),
"SI_IF $dst, $vcc, $target",
- [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
+ [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
>;
def SI_ELSE : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src, brtarget:$target),
"SI_ELSE $dst, $src, $target",
- [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
+ [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> {
let Constraints = "$src = $dst";
}
@@ -1113,7 +1124,7 @@ def SI_LOOP : InstSI <
(outs),
(ins SReg_64:$saved, brtarget:$target),
"SI_LOOP $saved, $target",
- [(int_SI_loop SReg_64:$saved, bb:$target)]
+ [(int_SI_loop i64:$saved, bb:$target)]
>;
} // end isBranch = 1, isTerminator = 1
@@ -1122,35 +1133,35 @@ def SI_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src),
"SI_ELSE $dst, $src",
- [(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
+ [(set i64:$dst, (int_SI_break i64:$src))]
>;
def SI_IF_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, SReg_64:$src),
"SI_IF_BREAK $dst, $vcc, $src",
- [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
+ [(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
>;
def SI_ELSE_BREAK : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src0, SReg_64:$src1),
"SI_ELSE_BREAK $dst, $src0, $src1",
- [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
+ [(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
>;
def SI_END_CF : InstSI <
(outs),
(ins SReg_64:$saved),
"SI_END_CF $saved",
- [(int_SI_end_cf SReg_64:$saved)]
+ [(int_SI_end_cf i64:$saved)]
>;
def SI_KILL : InstSI <
(outs),
(ins VReg_32:$src),
"SI_KIL $src",
- [(int_AMDGPU_kill VReg_32:$src)]
+ [(int_AMDGPU_kill f32:$src)]
>;
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
@@ -1184,8 +1195,8 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
} // end IsCodeGenOnly, isPseudo
def : Pat<
- (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
- (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
+ (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
+ (V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0))
>;
def : Pat <
@@ -1195,93 +1206,110 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
- (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
- VReg_32:$buf_idx_vgpr),
+ (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset,
+ i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
- VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
- 0, 0, 0)
+ $buf_idx_vgpr, $tlst, 0, 0, 0)
>;
/* int_SI_export */
def : Pat <
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
- VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+ f32:$src0, f32:$src1, f32:$src2, f32:$src3),
(EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
- VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
+ $src0, $src1, $src2, $src3)
>;
+/********** ======================= **********/
+/********** Image sampling patterns **********/
+/********** ======================= **********/
/* int_SI_sample for simple 1D texture lookup */
def : Pat <
- (int_SI_sample imm:$writemask, VReg_32:$addr,
- SReg_256:$rsrc, SReg_128:$sampler, imm),
- (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
- SReg_256:$rsrc, SReg_128:$sampler)
+ (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
+ (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
- ValueType addr_type> : Pat <
- (name imm:$writemask, (addr_type addr_class:$addr),
- SReg_256:$rsrc, SReg_128:$sampler, imm),
- (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
- SReg_256:$rsrc, SReg_128:$sampler)
+class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
+ (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
- ValueType addr_type> : Pat <
- (name imm:$writemask, (addr_type addr_class:$addr),
- SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
- (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
- SReg_256:$rsrc, SReg_128:$sampler)
+class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
+ (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
- ValueType addr_type> : Pat <
- (name imm:$writemask, (addr_type addr_class:$addr),
- SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
- (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
- SReg_256:$rsrc, SReg_128:$sampler)
+class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
+ (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowPattern<Intrinsic name, MIMG opcode,
- RegisterClass addr_class, ValueType addr_type> : Pat <
- (name imm:$writemask, (addr_type addr_class:$addr),
- SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
- (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
- SReg_256:$rsrc, SReg_128:$sampler)
+ ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
+ (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
- RegisterClass addr_class, ValueType addr_type> : Pat <
- (name imm:$writemask, (addr_type addr_class:$addr),
- SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
- (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
- SReg_256:$rsrc, SReg_128:$sampler)
+ ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
+ (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
/* int_SI_sample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
- def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
- def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
- def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
- def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
- def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
-
- def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
- def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
- def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
- def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
-
- def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
- def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
- def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
- def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+multiclass SamplePatterns<ValueType addr_type> {
+ def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+ def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+ def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+ def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
+
+ def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
+ def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
+ def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
+
+ def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
+ def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
+ def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
+ def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
+}
+
+defm : SamplePatterns<v2i32>;
+defm : SamplePatterns<v4i32>;
+defm : SamplePatterns<v8i32>;
+defm : SamplePatterns<v16i32>;
+
+/* int_SI_imageload for texture fetches consuming varying address parameters */
+class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+ (name addr_type:$addr, v32i8:$rsrc, imm),
+ (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+ (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY),
+ (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+multiclass ImageLoadPatterns<ValueType addr_type> {
+ def : ImageLoadPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
+ def : ImageLoadArrayPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
}
-defm : SamplePatterns<VReg_64, v2i32>;
-defm : SamplePatterns<VReg_128, v4i32>;
-defm : SamplePatterns<VReg_256, v8i32>;
-defm : SamplePatterns<VReg_512, v16i32>;
+defm : ImageLoadPatterns<v2i32>;
+defm : ImageLoadPatterns<v4i32>;
+
+/* Image resource information */
+def : Pat <
+ (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
+ (IMAGE_GET_RESINFO 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
+
+def : Pat <
+ (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
+ (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
/********** ============================================ **********/
/********** Extraction, Insertion, Building and Casting **********/
@@ -1289,77 +1317,77 @@ defm : SamplePatterns<VReg_512, v16i32>;
foreach Index = 0-2 in {
def Extract_Element_v2i32_#Index : Extract_Element <
- i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v2i32_#Index : Insert_Element <
- i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v2f32_#Index : Extract_Element <
- f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v2f32_#Index : Insert_Element <
- f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+ f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-3 in {
def Extract_Element_v4i32_#Index : Extract_Element <
- i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v4i32_#Index : Insert_Element <
- i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v4f32_#Index : Extract_Element <
- f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v4f32_#Index : Insert_Element <
- f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+ f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-7 in {
def Extract_Element_v8i32_#Index : Extract_Element <
- i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v8i32_#Index : Insert_Element <
- i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v8f32_#Index : Extract_Element <
- f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v8f32_#Index : Insert_Element <
- f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+ f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
>;
}
foreach Index = 0-15 in {
def Extract_Element_v16i32_#Index : Extract_Element <
- i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v16i32_#Index : Insert_Element <
- i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v16f32_#Index : Extract_Element <
- f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Insert_Element_v16f32_#Index : Insert_Element <
- f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+ f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
>;
}
-def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
-def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
-def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
-def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
-def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
-def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
-def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
-def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
+def : Vector1_Build <v1i32, i32, VReg_32>;
+def : Vector2_Build <v2i32, i32>;
+def : Vector2_Build <v2f32, f32>;
+def : Vector4_Build <v4i32, i32>;
+def : Vector4_Build <v4f32, f32>;
+def : Vector8_Build <v8i32, i32>;
+def : Vector8_Build <v8f32, f32>;
+def : Vector16_Build <v16i32, i32>;
+def : Vector16_Build <v16f32, f32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
@@ -1372,20 +1400,20 @@ def : BitConvert <f32, i32, VReg_32>;
/********** =================== **********/
def : Pat <
- (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
- (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+ (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
>;
def : Pat <
- (fabs VReg_32:$src),
- (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ (fabs f32:$src),
+ (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
>;
def : Pat <
- (fneg VReg_32:$src),
- (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+ (fneg f32:$src),
+ (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
>;
@@ -1426,16 +1454,16 @@ def : Pat <
/********** ===================== **********/
def : Pat <
- (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
- (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
+ (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
+ (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
>;
def : Pat <
- (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
- (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
- imm:$attr_chan, imm:$attr, M0Reg:$params),
- (EXTRACT_SUBREG VReg_64:$ij, sub1),
- imm:$attr_chan, imm:$attr, M0Reg:$params)
+ (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
+ (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
+ imm:$attr_chan, imm:$attr, i32:$params),
+ (EXTRACT_SUBREG $ij, sub1),
+ imm:$attr_chan, imm:$attr, $params)
>;
/********** ================== **********/
@@ -1443,101 +1471,111 @@ def : Pat <
/********** ================== **********/
/* llvm.AMDGPU.pow */
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
def : Pat <
- (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
- (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
+ (int_AMDGPU_div f32:$src0, f32:$src1),
+ (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
>;
def : Pat<
- (fdiv VSrc_32:$src0, VSrc_32:$src1),
- (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
+ (fdiv f32:$src0, f32:$src1),
+ (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1))
>;
def : Pat <
- (fcos VSrc_32:$src0),
- (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+ (fcos f32:$src0),
+ (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
>;
def : Pat <
- (fsin VSrc_32:$src0),
- (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+ (fsin f32:$src0),
+ (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
>;
def : Pat <
- (int_AMDGPU_cube VReg_128:$src),
+ (int_AMDGPU_cube v4f32:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
- (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
- (EXTRACT_SUBREG VReg_128:$src, sub1),
- (EXTRACT_SUBREG VReg_128:$src, sub2),
- 0, 0, 0, 0), sub0),
- (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
- (EXTRACT_SUBREG VReg_128:$src, sub1),
- (EXTRACT_SUBREG VReg_128:$src, sub2),
- 0, 0, 0, 0), sub1),
- (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
- (EXTRACT_SUBREG VReg_128:$src, sub1),
- (EXTRACT_SUBREG VReg_128:$src, sub2),
- 0, 0, 0, 0), sub2),
- (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
- (EXTRACT_SUBREG VReg_128:$src, sub1),
- (EXTRACT_SUBREG VReg_128:$src, sub2),
- 0, 0, 0, 0), sub3)
+ (V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
+ (EXTRACT_SUBREG $src, sub1),
+ (EXTRACT_SUBREG $src, sub2)),
+ sub0),
+ (V_CUBESC_F32 (EXTRACT_SUBREG $src, sub0),
+ (EXTRACT_SUBREG $src, sub1),
+ (EXTRACT_SUBREG $src, sub2)),
+ sub1),
+ (V_CUBEMA_F32 (EXTRACT_SUBREG $src, sub0),
+ (EXTRACT_SUBREG $src, sub1),
+ (EXTRACT_SUBREG $src, sub2)),
+ sub2),
+ (V_CUBEID_F32 (EXTRACT_SUBREG $src, sub0),
+ (EXTRACT_SUBREG $src, sub1),
+ (EXTRACT_SUBREG $src, sub2)),
+ sub3)
>;
def : Pat <
- (i32 (sext (i1 SReg_64:$src0))),
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
+ (i32 (sext i1:$src0)),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
>;
// 1. Offset as 8bit DWORD immediate
def : Pat <
- (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
- (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+ (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (int_SI_load_const SReg_128:$sbase, imm:$offset),
- (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+ (int_SI_load_const v16i8:$sbase, imm:$offset),
+ (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
// 3. Offset in an 32Bit VGPR
def : Pat <
- (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
- (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+ (int_SI_load_const v16i8:$sbase, i32:$voff),
+ (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, $voff, $sbase, 0, 0, 0)
+>;
+
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+ (AMDGPUurecip i32:$src0),
+ (V_CVT_U32_F32_e32
+ (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
+ (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
>;
/********** ================== **********/
/********** VOP3 Patterns **********/
/********** ================== **********/
-def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
- (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
- 0, 0, 0, 0)>;
+def : Pat <
+ (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)),
+ (V_MAD_F32 $src0, $src1, $src2)
+>;
/********** ================== **********/
/********** SMRD Patterns **********/
/********** ================== **********/
multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+
// 1. Offset as 8bit DWORD immediate
def : Pat <
- (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
- (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
+ (constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)),
+ (vt (Instr_IMM $sbase, IMM8bitDWORD:$offset))
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
- (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
+ (constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)),
+ (vt (Instr_SGPR $sbase, (S_MOV_B32 imm:$offset)))
>;
// 3. No offset at all
def : Pat <
- (constant_load SReg_64:$sbase),
- (vt (Instr_IMM SReg_64:$sbase, 0))
+ (constant_load i64:$sbase),
+ (vt (Instr_IMM $sbase, 0))
>;
}
@@ -1550,45 +1588,37 @@ defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
/********** Indirect adressing **********/
/********** ====================== **********/
-multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
- SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
+
// 1. Extract with offset
def : Pat<
- (vector_extract (vt rc:$vec),
- (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
- ),
- (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+ (vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
>;
// 2. Extract without offset
def : Pat<
- (vector_extract (vt rc:$vec),
- (i64 (zext (i32 VReg_32:$idx)))
- ),
- (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+ (vector_extract vt:$vec, (i64 (zext i32:$idx))),
+ (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
>;
// 3. Insert with offset
def : Pat<
- (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
- (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
- ),
- (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+ (vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))),
+ (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
- (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
- (i64 (zext (i32 VReg_32:$idx)))
- ),
- (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+ (vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))),
+ (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
>;
}
-defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
/********** =============== **********/
/********** Conditions **********/
@@ -1596,12 +1626,18 @@ defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
def : Pat<
(i1 (setcc f32:$src0, f32:$src1, SETO)),
- (V_CMP_O_F32_e64 f32:$src0, f32:$src1)
+ (V_CMP_O_F32_e64 $src0, $src1)
>;
def : Pat<
(i1 (setcc f32:$src0, f32:$src1, SETUO)),
- (V_CMP_U_F32_e64 f32:$src0, f32:$src1)
+ (V_CMP_U_F32_e64 $src0, $src1)
>;
+//============================================================================//
+// Miscellaneous Optimization Patterns
+//============================================================================//
+
+def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
+
} // End isSI predicate
diff --git a/contrib/llvm/lib/Target/R600/SIIntrinsics.td b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
index 0af378e..224cd2f 100644
--- a/contrib/llvm/lib/Target/R600/SIIntrinsics.td
+++ b/contrib/llvm/lib/Target/R600/SIIntrinsics.td
@@ -19,12 +19,16 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
- class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
def int_SI_samplel : Sample;
+ def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
/* Interpolation Intrinsics */
def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
index 4f14931..244d4c00 100644
--- a/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/R600/SIRegisterInfo.td
@@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1],
[(add (trunc VGPR_32, 255)),
(add (shl VGPR_32, 1))]>;
+// VGPR 96-bit registers
+def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
+ [(add (trunc VGPR_32, 254)),
+ (add (shl VGPR_32, 1)),
+ (add (shl VGPR_32, 2))]>;
+
// VGPR 128-bit registers
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (trunc VGPR_32, 253)),
@@ -151,7 +157,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
(add SGPR_64, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
@@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+ let Size = 96;
+}
+
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
new file mode 100644
index 0000000..aac0e8d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
@@ -0,0 +1,62 @@
+//===-- SparcBaseInfo.h - Top level definitions for Sparc ---- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions
+// for the Sparc target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core code gen
+// types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCBASEINFO_H
+#define SPARCBASEINFO_H
+
+namespace llvm {
+
+/// SPII - This namespace holds target specific flags for instruction info.
+namespace SPII {
+
+/// Target Operand Flags. Sparc specific TargetFlags for MachineOperands and
+/// SDNodes.
+enum TOF {
+ MO_NO_FLAG,
+
+ // Extract the low 10 bits of an address.
+ // Assembler: %lo(addr)
+ MO_LO,
+
+ // Extract bits 31-10 of an address. Only for sethi.
+ // Assembler: %hi(addr) or %lm(addr)
+ MO_HI,
+
+ // Extract bits 43-22 of an adress. Only for sethi.
+ // Assembler: %h44(addr)
+ MO_H44,
+
+ // Extract bits 21-12 of an address.
+ // Assembler: %m44(addr)
+ MO_M44,
+
+ // Extract bits 11-0 of an address.
+ // Assembler: %l44(addr)
+ MO_L44,
+
+ // Extract bits 63-42 of an address. Only for sethi.
+ // Assembler: %hh(addr)
+ MO_HH,
+
+ // Extract bits 41-32 of an address.
+ // Assembler: %hm(addr)
+ MO_HM
+};
+
+} // end namespace SPII
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 7fdb0c3..1c64e1b 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -50,14 +50,42 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
+// Code models. Some only make sense for 64-bit code.
+//
+// SunCC Reloc CodeModel Constraints
+// abs32 Static Small text+data+bss linked below 2^32 bytes
+// abs44 Static Medium text+data+bss linked below 2^44 bytes
+// abs64 Static Large text smaller than 2^31 bytes
+// pic13 PIC_ Small GOT < 2^13 bytes
+// pic32 PIC_ Medium GOT < 2^32 bytes
+//
+// All code models require that the text segment is smaller than 2GB.
+
static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ // The default 32-bit code model is abs32/pic32.
+ if (CM == CodeModel::Default)
+ CM = RM == Reloc::PIC_ ? CodeModel::Medium : CodeModel::Small;
+
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
+static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ // The default 64-bit code model is abs44/pic32.
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Medium;
+
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
extern "C" void LLVMInitializeSparcTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
@@ -67,7 +95,7 @@ extern "C" void LLVMInitializeSparcTargetMC() {
TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
createSparcMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
- createSparcMCCodeGenInfo);
+ createSparcV9MCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index e14b3cb..108eb90 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -16,6 +16,7 @@
#include "Sparc.h"
#include "SparcInstrInfo.h"
#include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -72,15 +73,39 @@ namespace {
void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand (opNum);
- bool CloseParen = false;
- if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
- O << "%hi(";
- CloseParen = true;
- } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
- !MO.isReg() && !MO.isImm()) {
- O << "%lo(";
- CloseParen = true;
+ unsigned TF = MO.getTargetFlags();
+#ifndef NDEBUG
+ // Verify the target flags.
+ if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
+ if (MI->getOpcode() == SP::CALL)
+ assert(TF == SPII::MO_NO_FLAG &&
+ "Cannot handle target flags on call address");
+ else if (MI->getOpcode() == SP::SETHIi)
+ assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) &&
+ "Invalid target flags for address operand on sethi");
+ else
+ assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 ||
+ TF == SPII::MO_HM) &&
+ "Invalid target flags for small address operand");
}
+#endif
+
+ bool CloseParen = true;
+ switch (TF) {
+ default:
+ llvm_unreachable("Unknown target flags on operand");
+ case SPII::MO_NO_FLAG:
+ CloseParen = false;
+ break;
+ case SPII::MO_LO: O << "%lo("; break;
+ case SPII::MO_HI: O << "%hi("; break;
+ case SPII::MO_H44: O << "%h44("; break;
+ case SPII::MO_M44: O << "%m44("; break;
+ case SPII::MO_L44: O << "%l44("; break;
+ case SPII::MO_HH: O << "%hh("; break;
+ case SPII::MO_HM: O << "%hm("; break;
+ }
+
switch (MO.getType()) {
case MachineOperand::MO_Register:
O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
@@ -127,14 +152,7 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
return; // don't print "+0"
O << "+";
- if (MI->getOperand(opNum+1).isGlobal() ||
- MI->getOperand(opNum+1).isCPI()) {
- O << "%lo(";
- printOperand(MI, opNum+1, O);
- O << ")";
- } else {
- printOperand(MI, opNum+1, O);
- }
+ printOperand(MI, opNum+1, O);
}
bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
index b38ac61..54784e0 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -12,25 +12,9 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
+// SPARC v8 32-bit.
//===----------------------------------------------------------------------===//
-// Sparc 32-bit C return-value convention.
-def RetCC_Sparc32 : CallingConv<[
- CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
- CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
- CCIfType<[f64], CCAssignToReg<[D0, D1]>>
-]>;
-
-// Sparc 64-bit C return-value convention.
-def RetCC_Sparc64 : CallingConv<[
- CCIfType<[i32], CCPromoteToType<i64>>,
- CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
- CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
- CCIfType<[f64], CCAssignToReg<[D0, D1]>>
-]>;
-
-// Sparc 32-bit C Calling convention.
def CC_Sparc32 : CallingConv<[
//Custom assign SRet to [sp+64].
CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
@@ -43,14 +27,93 @@ def CC_Sparc32 : CallingConv<[
CCAssignToStack<4, 4>
]>;
-// Sparc 64-bit C Calling convention.
+def RetCC_Sparc32 : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+ CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// SPARC v9 64-bit.
+//===----------------------------------------------------------------------===//
+//
+// The 64-bit ABI conceptually assigns all function arguments to a parameter
+// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments
+// occupy a multiple of 8 bytes in the array. Integer arguments are extended to
+// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the
+// first 4 bytes in the slot are undefined.
+//
+// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter
+// array at fixed offsets. Integer arguments are promoted to registers when
+// possible.
+//
+// The floating point registers %f0 to %f31 shadow the first 128 bytes of the
+// parameter array at fixed offsets. Float and double parameters are promoted
+// to these registers when possible.
+//
+// Structs up to 16 bytes in size are passed by value. They are right-aligned
+// in one or two 8-byte slots in the parameter array. Struct members are
+// promoted to both floating point and integer registers when possible. A
+// struct containing two floats would thus be passed in %f0 and %f1, while two
+// float function arguments would occupy 8 bytes each, and be passed in %f1 and
+// %f3.
+//
+// When a struct { int, float } is passed by value, the int goes in the high
+// bits of an integer register while the float goes in a floating point
+// register.
+//
+// The difference is encoded in LLVM IR using the inreg atttribute on function
+// arguments:
+//
+// C: void f(float, float);
+// IR: declare void f(float %f1, float %f3)
+//
+// C: void f(struct { float f0, f1; });
+// IR: declare void f(float inreg %f0, float inreg %f1)
+//
+// C: void f(int, float);
+// IR: declare void f(int signext %i0, float %f3)
+//
+// C: void f(struct { int i0high; float f1; });
+// IR: declare void f(i32 inreg %i0high, float inreg %f1)
+//
+// Two ints in a struct are simply coerced to i64:
+//
+// C: void f(struct { int i0high, i0low; });
+// IR: declare void f(i64 %i0.coerced)
+//
+// The frontend and backend divide the task of producing ABI compliant code for
+// C functions. The C frontend will:
+//
+// - Annotate integer arguments with zeroext or signext attributes.
+//
+// - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with
+// inreg attributes.
+//
+// - Pass structs larger than 16 bytes indirectly with an explicit pointer
+// argument. The byval attribute is not used.
+//
+// The backend will:
+//
+// - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg.
+//
+// - Promote to integer or floating point registers depending on type.
+//
+// Function return values are passed exactly like function arguments, except a
+// struct up to 32 bytes in size can be returned in registers.
+
+// Function arguments AND return values.
def CC_Sparc64 : CallingConv<[
+ // The frontend uses the inreg flag to indicate i32 and float arguments from
+ // structs. These arguments are not promoted to 64 bits, but they can still
+ // be assigned to integer and float registers.
+ CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>,
+
// All integers are promoted to i64 by the caller.
CCIfType<[i32], CCPromoteToType<i64>>,
- // Integer arguments get passed in integer registers if there is space.
- CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
- // FIXME: Floating point arguments.
- // Alternatively, they are assigned to the stack in 8-byte aligned units.
- CCAssignToStack<8, 8>
+ // Custom assignment is required because stack space is reserved for all
+ // arguments whether they are passed in registers or not.
+ CCCustom<"CC_Sparc64_Full">
]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index a0dae6e..7874240 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -37,18 +37,27 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
// Get the number of bytes to allocate from the FrameInfo
int NumBytes = (int) MFI->getStackSize();
- // Emit the correct save instruction based on the number of bytes in
- // the frame. Minimum stack frame size according to V8 ABI is:
- // 16 words for register window spill
- // 1 word for address of returned aggregate-value
- // + 6 words for passing parameters on the stack
- // ----------
- // 23 words * 4 bytes per word = 92 bytes
- NumBytes += 92;
+ if (SubTarget.is64Bit()) {
+ // All 64-bit stack frames must be 16-byte aligned, and must reserve space
+ // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128.
+ NumBytes += 128;
+ // Frames with calls must also reserve space for 6 outgoing arguments
+ // whether they are used or not. LowerCall_64 takes care of that.
+ assert(NumBytes % 16 == 0 && "Stack size not 16-byte aligned");
+ } else {
+ // Emit the correct save instruction based on the number of bytes in
+ // the frame. Minimum stack frame size according to V8 ABI is:
+ // 16 words for register window spill
+ // 1 word for address of returned aggregate-value
+ // + 6 words for passing parameters on the stack
+ // ----------
+ // 23 words * 4 bytes per word = 92 bytes
+ NumBytes += 92;
- // Round up to next doubleword boundary -- a double-word boundary
- // is required by the ABI.
- NumBytes = (NumBytes + 7) & ~7;
+ // Round up to next doubleword boundary -- a double-word boundary
+ // is required by the ABI.
+ NumBytes = RoundUpToAlignment(NumBytes, 8);
+ }
NumBytes = -NumBytes;
if (NumBytes >= -4096) {
@@ -70,15 +79,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
void SparcFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- MachineInstr &MI = *I;
- DebugLoc dl = MI.getDebugLoc();
- int Size = MI.getOperand(0).getImm();
- if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
- Size = -Size;
- const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
- if (Size)
- BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+ if (!hasReservedCallFrame(MF)) {
+ MachineInstr &MI = *I;
+ DebugLoc DL = MI.getDebugLoc();
+ int Size = MI.getOperand(0).getImm();
+ if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+ Size = -Size;
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (Size)
+ BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
+ .addImm(Size);
+ }
MBB.erase(I);
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
index 464233e..c375662 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -22,10 +22,12 @@ namespace llvm {
class SparcSubtarget;
class SparcFrameLowering : public TargetFrameLowering {
+ const SparcSubtarget &SubTarget;
public:
- explicit SparcFrameLowering(const SparcSubtarget &/*sti*/)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {
- }
+ explicit SparcFrameLowering(const SparcSubtarget &ST)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8),
+ SubTarget(ST) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 5fa545d..a709685 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -73,7 +73,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
SDValue &Base, SDValue &Offset) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI.getPointerTy());
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -87,7 +87,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
// Constant offset from frame ref.
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
+ TLI.getPointerTy());
} else {
Base = Addr.getOperand(0);
}
@@ -130,7 +131,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
}
R1 = Addr;
- R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+ R2 = CurDAG->getRegister(SP::G0, TLI.getPointerTy());
return true;
}
@@ -146,6 +147,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
case ISD::SDIV:
case ISD::UDIV: {
+ // sdivx / udivx handle 64-bit divides.
+ if (N->getValueType(0) == MVT::i64)
+ break;
// FIXME: should use a custom expander to expose the SRA to the dag.
SDValue DivLHS = N->getOperand(0);
SDValue DivRHS = N->getOperand(1);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 325f134..3863e2c 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -15,6 +15,7 @@
#include "SparcISelLowering.h"
#include "SparcMachineFunctionInfo.h"
#include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -74,27 +75,118 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
return true;
}
+// Allocate a full-sized argument for the 64-bit ABI.
+static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ assert((LocVT == MVT::f32 || LocVT.getSizeInBits() == 64) &&
+ "Can't handle non-64 bits locations");
+
+ // Stack space is allocated for all arguments starting from [%fp+BIAS+128].
+ unsigned Offset = State.AllocateStack(8, 8);
+ unsigned Reg = 0;
+
+ if (LocVT == MVT::i64 && Offset < 6*8)
+ // Promote integers to %i0-%i5.
+ Reg = SP::I0 + Offset/8;
+ else if (LocVT == MVT::f64 && Offset < 16*8)
+ // Promote doubles to %d0-%d30. (Which LLVM calls D0-D15).
+ Reg = SP::D0 + Offset/8;
+ else if (LocVT == MVT::f32 && Offset < 16*8)
+ // Promote floats to %f1, %f3, ...
+ Reg = SP::F1 + Offset/4;
+
+ // Promote to register when possible, otherwise use the stack slot.
+ if (Reg) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+ }
+
+ // This argument goes on the stack in an 8-byte slot.
+ // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to
+ // the right-aligned float. The first 4 bytes of the stack slot are undefined.
+ if (LocVT == MVT::f32)
+ Offset += 4;
+
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return true;
+}
+
+// Allocate a half-sized argument for the 64-bit ABI.
+//
+// This is used when passing { float, int } structs by value in registers.
+static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
+ unsigned Offset = State.AllocateStack(4, 4);
+
+ if (LocVT == MVT::f32 && Offset < 16*8) {
+ // Promote floats to %f0-%f31.
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4,
+ LocVT, LocInfo));
+ return true;
+ }
+
+ if (LocVT == MVT::i32 && Offset < 6*8) {
+ // Promote integers to %i0-%i5, using half the register.
+ unsigned Reg = SP::I0 + Offset/8;
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::AExt;
+
+ // Set the Custom bit if this i32 goes in the high bits of a register.
+ if (Offset % 8 == 0)
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg,
+ LocVT, LocInfo));
+ else
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+ }
+
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return true;
+}
+
#include "SparcGenCallingConv.inc"
+// The calling conventions in SparcCallingConv.td are described in terms of the
+// callee's register window. This function translates registers to the
+// corresponding caller window %o register.
+static unsigned toCallerWindow(unsigned Reg) {
+ assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7 && "Unexpected enum");
+ if (Reg >= SP::I0 && Reg <= SP::I7)
+ return Reg - SP::I0 + SP::O0;
+ return Reg;
+}
+
SDValue
SparcTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
+ CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
+ DebugLoc DL, SelectionDAG &DAG) const {
+ if (Subtarget->is64Bit())
+ return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+ return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+}
+SDValue
+SparcTargetLowering::LowerReturn_32(SDValue Chain,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
// CCValAssign - represent the assignment of the return value to locations.
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
DAG.getTarget(), RVLocs, *DAG.getContext());
- // Analize return values.
- CCInfo.AnalyzeReturn(Outs, Subtarget->is64Bit() ?
- RetCC_Sparc64 : RetCC_Sparc32);
+ // Analyze return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -106,7 +198,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
OutVals[i], Flag);
// Guarantee that all emitted copies are stuck together with flags.
@@ -121,8 +213,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
unsigned Reg = SFI->getSRetReturnReg();
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
- SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
- Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+ Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
@@ -135,7 +227,85 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+ return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
+ &RetOps[0], RetOps.size());
+}
+
+// Lower return values for the 64-bit ABI.
+// Return values are passed the exactly the same way as function arguments.
+SDValue
+SparcTargetLowering::LowerReturn_64(SDValue Chain,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const {
+ // CCValAssign - represent the assignment of the return value to locations.
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+
+ // Analyze return values.
+ CCInfo.AnalyzeReturn(Outs, CC_Sparc64);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // The second operand on the return instruction is the return address offset.
+ // The return address is always %i7+8 with the 64-bit ABI.
+ RetOps.push_back(DAG.getConstant(8, MVT::i32));
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue OutVal = OutVals[i];
+
+ // Integer return values must be sign or zero extended by the callee.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
+ break;
+ case CCValAssign::ZExt:
+ OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
+ break;
+ case CCValAssign::AExt:
+ OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
+ default:
+ break;
+ }
+
+ // The custom bit on an i32 return value indicates that it should be passed
+ // in the high bits of the register.
+ if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+ OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal,
+ DAG.getConstant(32, MVT::i32));
+
+ // The next value may go in the low bits of the same register.
+ // Handle both at once.
+ if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) {
+ SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]);
+ OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV);
+ // Skip the next value, it's already done.
+ ++i;
+ }
+ }
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
+
+ // Guarantee that all emitted copies are stuck together with flags.
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
&RetOps[0], RetOps.size());
}
@@ -373,6 +543,9 @@ LowerFormalArguments_64(SDValue Chain,
getTargetMachine(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
+ // The argument array begins at %fp+BIAS+128, after the register save area.
+ const unsigned ArgArea = 128;
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc()) {
@@ -384,6 +557,11 @@ LowerFormalArguments_64(SDValue Chain,
getRegClassFor(VA.getLocVT()));
SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
+ // Get the high bits for i32 struct elements.
+ if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+ Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
+ DAG.getConstant(32, MVT::i32));
+
// The caller promoted the argument, so insert an Assert?ext SDNode so we
// won't promote the value again in this function.
switch (VA.getLocInfo()) {
@@ -409,13 +587,71 @@ LowerFormalArguments_64(SDValue Chain,
// The registers are exhausted. This argument was passed on the stack.
assert(VA.isMemLoc());
+ // The CC_Sparc64_Full/Half functions compute stack offsets relative to the
+ // beginning of the arguments area at %fp+BIAS+128.
+ unsigned Offset = VA.getLocMemOffset() + ArgArea;
+ unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+ // Adjust offset for extended arguments, SPARC is big-endian.
+ // The caller will have written the full slot with extended bytes, but we
+ // prefer our own extending loads.
+ if (VA.isExtInLoc())
+ Offset += 8 - ValSize;
+ int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true);
+ InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain,
+ DAG.getFrameIndex(FI, getPointerTy()),
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0));
}
+
+ if (!IsVarArg)
+ return Chain;
+
+ // This function takes variable arguments, some of which may have been passed
+ // in registers %i0-%i5. Variable floating point arguments are never passed
+ // in floating point registers. They go on %i0-%i5 or on the stack like
+ // integer arguments.
+ //
+ // The va_start intrinsic needs to know the offset to the first variable
+ // argument.
+ unsigned ArgOffset = CCInfo.getNextStackOffset();
+ SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+ // Skip the 128 bytes of register save area.
+ FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea +
+ Subtarget->getStackPointerBias());
+
+ // Save the variable arguments that were passed in registers.
+ // The caller is required to reserve stack space for 6 arguments regardless
+ // of how many arguments were actually passed.
+ SmallVector<SDValue, 8> OutChains;
+ for (; ArgOffset < 6*8; ArgOffset += 8) {
+ unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
+ SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
+ OutChains.push_back(DAG.getStore(Chain, DL, VArg,
+ DAG.getFrameIndex(FI, getPointerTy()),
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+
+ if (!OutChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &OutChains[0], OutChains.size());
+
return Chain;
}
SDValue
SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
+ if (Subtarget->is64Bit())
+ return LowerCall_64(CLI, InVals);
+ return LowerCall_32(CLI, InVals);
+}
+
+// Lower a call for the 32-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
DebugLoc &dl = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
@@ -618,11 +854,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- unsigned Reg = RegsToPass[i].first;
- // Remap I0->I7 -> O0->O7.
- if (Reg >= SP::I0 && Reg <= SP::I7)
- Reg = Reg-SP::I0+SP::O0;
-
+ unsigned Reg = toCallerWindow(RegsToPass[i].first);
Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
@@ -644,13 +876,9 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(Callee);
if (hasStructRetAttr)
Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- unsigned Reg = RegsToPass[i].first;
- if (Reg >= SP::I0 && Reg <= SP::I7)
- Reg = Reg-SP::I0+SP::O0;
-
- Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
- }
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
+ RegsToPass[i].second.getValueType()));
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -670,13 +898,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
- unsigned Reg = RVLocs[i].getLocReg();
-
- // Remap I0->I7 -> O0->O7.
- if (Reg >= SP::I0 && Reg <= SP::I7)
- Reg = Reg-SP::I0+SP::O0;
-
- Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+ Chain = DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()),
RVLocs[i].getValVT(), InFlag).getValue(1);
InFlag = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
@@ -709,6 +931,259 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
return getDataLayout()->getTypeAllocSize(ElementTy);
}
+
+// Fixup floating point arguments in the ... part of a varargs call.
+//
+// The SPARC v9 ABI requires that floating point arguments are treated the same
+// as integers when calling a varargs function. This does not apply to the
+// fixed arguments that are part of the function's prototype.
+//
+// This function post-processes a CCValAssign array created by
+// AnalyzeCallOperands().
+static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs,
+ ArrayRef<ISD::OutputArg> Outs) {
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ const CCValAssign &VA = ArgLocs[i];
+ // FIXME: What about f32 arguments? C promotes them to f64 when calling
+ // varargs functions.
+ if (!VA.isRegLoc() || VA.getLocVT() != MVT::f64)
+ continue;
+ // The fixed arguments to a varargs function still go in FP registers.
+ if (Outs[VA.getValNo()].IsFixed)
+ continue;
+
+ // This floating point argument should be reassigned.
+ CCValAssign NewVA;
+
+ // Determine the offset into the argument array.
+ unsigned Offset = 8 * (VA.getLocReg() - SP::D0);
+ assert(Offset < 16*8 && "Offset out of range, bad register enum?");
+
+ if (Offset < 6*8) {
+ // This argument should go in %i0-%i5.
+ unsigned IReg = SP::I0 + Offset/8;
+ // Full register, just bitconvert into i64.
+ NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
+ IReg, MVT::i64, CCValAssign::BCvt);
+ } else {
+ // This needs to go to memory, we're out of integer registers.
+ NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
+ Offset, VA.getLocVT(), VA.getLocInfo());
+ }
+ ArgLocs[i] = NewVA;
+ }
+}
+
+// Lower a call for the 64-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc DL = CLI.DL;
+ SDValue Chain = CLI.Chain;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ // The stack offset computed by CC_Sparc64 includes all arguments.
+ // Called functions expect 6 argument words to exist in the stack frame, used
+ // or not.
+ unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());
+
+ // Keep stack frames 16-byte aligned.
+ ArgsSize = RoundUpToAlignment(ArgsSize, 16);
+
+ // Varargs calls require special treatment.
+ if (CLI.IsVarArg)
+ fixupVariableFloatArgs(ArgLocs, CLI.Outs);
+
+ // Adjust the stack pointer to make room for the arguments.
+ // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+ // with more than 6 arguments.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+ // Collect the set of registers to pass to the function and their values.
+ // This will be emitted as a sequence of CopyToReg nodes glued to the call
+ // instruction.
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+ // Collect chains from all the memory opeations that copy arguments to the
+ // stack. They must follow the stack pointer adjustment above and precede the
+ // call instruction itself.
+ SmallVector<SDValue, 8> MemOpChains;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ const CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = CLI.OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown location info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ // The custom bit on an i32 return value indicates that it should be
+ // passed in the high bits of the register.
+ if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+ Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
+ DAG.getConstant(32, MVT::i32));
+
+ // The next value may go in the low bits of the same register.
+ // Handle both at once.
+ if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() &&
+ ArgLocs[i+1].getLocReg() == VA.getLocReg()) {
+ SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
+ CLI.OutVals[i+1]);
+ Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV);
+ // Skip the next value, it's already done.
+ ++i;
+ }
+ }
+ RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+ // The argument area starts at %fp+BIAS+128 in the callee frame,
+ // %sp+BIAS+128 in ours.
+ SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
+ Subtarget->getStackPointerBias() +
+ 128);
+ PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+
+ // Emit all stores, make sure they occur before the call.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of CopyToReg nodes glued together with token chain and
+ // glue operands which copy the outgoing args into registers. The InGlue is
+ // necessary since all emitted instructions must be stuck together in order
+ // to pass the live physical registers.
+ SDValue InGlue;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, DL,
+ RegsToPass[i].first, RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ SDValue Callee = CLI.Callee;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy());
+ else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+ // Build the operands for the call instruction itself.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Make sure the CopyToReg nodes are glued to the call instruction which
+ // consumes the registers.
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
+
+ // Now the call itself.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+ InGlue = Chain.getValue(1);
+
+ // Revert the stack pointer immediately after the call.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+ DAG.getIntPtrConstant(0, true), InGlue);
+ InGlue = Chain.getValue(1);
+
+ // Now extract the return values. This is more or less the same as
+ // LowerFormalArguments_64.
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+ DAG.getTarget(), RVLocs, *DAG.getContext());
+ RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ unsigned Reg = toCallerWindow(VA.getLocReg());
+
+ // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+ // reside in the same register in the high and low bits. Reuse the
+ // CopyFromReg previous node to avoid duplicate copies.
+ SDValue RV;
+ if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+ if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+ RV = Chain.getValue(0);
+
+ // But usually we'll create a new CopyFromReg for a different register.
+ if (!RV.getNode()) {
+ RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+ Chain = RV.getValue(1);
+ InGlue = Chain.getValue(2);
+ }
+
+ // Get the high bits for i32 struct elements.
+ if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+ RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+ DAG.getConstant(32, MVT::i32));
+
+ // The callee promoted the return value, so insert an Assert?ext SDNode so
+ // we won't promote the value again in this function.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ case CCValAssign::ZExt:
+ RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ default:
+ break;
+ }
+
+ // Truncate the register down to the return value type.
+ if (VA.isExtInLoc())
+ RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+ InVals.push_back(RV);
+ }
+
+ return Chain;
+}
+
//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
@@ -778,9 +1253,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// Custom legalize GlobalAddress nodes into LO/HI parts.
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
- setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
+ setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom);
+ setOperationAction(ISD::ConstantPool, getPointerTy(), Custom);
// Sparc doesn't have sext_inreg, replace them with shl/sra
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -831,7 +1306,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
// FIXME: There are instructions available for ATOMIC_FENCE
// on SparcV8 and later.
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -965,46 +1439,89 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
}
}
+// Convert to a target node and set target flags.
+SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF,
+ SelectionDAG &DAG) const {
+ if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
+ return DAG.getTargetGlobalAddress(GA->getGlobal(),
+ GA->getDebugLoc(),
+ GA->getValueType(0),
+ GA->getOffset(), TF);
+
+ if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
+ return DAG.getTargetConstantPool(CP->getConstVal(),
+ CP->getValueType(0),
+ CP->getAlignment(),
+ CP->getOffset(), TF);
+
+ if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
+ return DAG.getTargetExternalSymbol(ES->getSymbol(),
+ ES->getValueType(0), TF);
+
+ llvm_unreachable("Unhandled address SDNode");
+}
+
+// Split Op into high and low parts according to HiTF and LoTF.
+// Return an ADD node combining the parts.
+SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
+ unsigned HiTF, unsigned LoTF,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
+ SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
+ return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+}
+
+// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
+// or ExternalSymbol SDNode.
+SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = getPointerTy();
+
+ // Handle PIC mode first.
+ if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ // This is the pic32 code model, the GOT is known to be smaller than 4GB.
+ SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+ SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
+ SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo);
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+ }
+
+ // This is one of the absolute code models.
+ switch(getTargetMachine().getCodeModel()) {
+ default:
+ llvm_unreachable("Unsupported absolute code model");
+ case CodeModel::Small:
+ // abs32.
+ return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+ case CodeModel::Medium: {
+ // abs44.
+ SDValue H44 = makeHiLoPair(Op, SPII::MO_H44, SPII::MO_M44, DAG);
+ H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, MVT::i32));
+ SDValue L44 = withTargetFlags(Op, SPII::MO_L44, DAG);
+ L44 = DAG.getNode(SPISD::Lo, DL, VT, L44);
+ return DAG.getNode(ISD::ADD, DL, VT, H44, L44);
+ }
+ case CodeModel::Large: {
+ // abs64.
+ SDValue Hi = makeHiLoPair(Op, SPII::MO_HH, SPII::MO_HM, DAG);
+ Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, MVT::i32));
+ SDValue Lo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+ return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+ }
+ }
+}
+
SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- // FIXME there isn't really any debug info here
- DebugLoc dl = Op.getDebugLoc();
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
- SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
- SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
-
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
- return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
- SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
- getPointerTy());
- SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
- SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
- GlobalBase, RelAddr);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- AbsAddr, MachinePointerInfo(), false, false, false, 0);
+ return makeAddress(Op, DAG);
}
SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
- ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
- // FIXME there isn't really any debug info here
- DebugLoc dl = Op.getDebugLoc();
- const Constant *C = N->getConstVal();
- SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
- SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
- SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
- return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
- SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
- getPointerTy());
- SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
- SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
- GlobalBase, RelAddr);
- return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
- AbsAddr, MachinePointerInfo(), false, false, false, 0);
+ return makeAddress(Op, DAG);
}
static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -1092,14 +1609,13 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- DebugLoc dl = Op.getDebugLoc();
+ DebugLoc DL = Op.getDebugLoc();
SDValue Offset =
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- DAG.getRegister(SP::I6, MVT::i32),
- DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
- MVT::i32));
+ DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(),
+ DAG.getRegister(SP::I6, TLI.getPointerTy()),
+ DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+ return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
}
@@ -1108,33 +1624,22 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
EVT VT = Node->getValueType(0);
SDValue InChain = Node->getOperand(0);
SDValue VAListPtr = Node->getOperand(1);
+ EVT PtrVT = VAListPtr.getValueType();
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
- DebugLoc dl = Node->getDebugLoc();
- SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+ DebugLoc DL = Node->getDebugLoc();
+ SDValue VAList = DAG.getLoad(PtrVT, DL, InChain, VAListPtr,
MachinePointerInfo(SV), false, false, false, 0);
- // Increment the pointer, VAList, to the next vaarg
- SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
- DAG.getConstant(VT.getSizeInBits()/8,
- MVT::i32));
- // Store the incremented VAList to the legalized pointer
- InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+ // Increment the pointer, VAList, to the next vaarg.
+ SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+ DAG.getIntPtrConstant(VT.getSizeInBits()/8));
+ // Store the incremented VAList to the legalized pointer.
+ InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr,
VAListPtr, MachinePointerInfo(SV), false, false, 0);
- // Load the actual argument out of the pointer VAList, unless this is an
- // f64 load.
- if (VT != MVT::f64)
- return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
- false, false, false, 0);
-
- // Otherwise, load it as i64, then do a bitconvert.
- SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
- false, false, false, 0);
-
- // Bit-Convert the value to f64.
- SDValue Ops[2] = {
- DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
- V.getValue(1)
- };
- return DAG.getMergeValues(Ops, 2, dl);
+ // Load the actual argument out of the pointer VAList.
+ // We can't count on greater alignment than the word size.
+ return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
+ false, false, false,
+ std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8);
}
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index aa2ef71..fd706be 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -71,6 +71,7 @@ namespace llvm {
getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -95,6 +96,10 @@ namespace llvm {
virtual SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
LowerReturn(SDValue Chain,
@@ -102,11 +107,25 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ SDValue LowerReturn_32(SDValue Chain,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const;
+ SDValue LowerReturn_64(SDValue Chain,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+ SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
+ SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+ SelectionDAG &DAG) const;
+ SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index ca1153b..91805f9 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -40,6 +40,9 @@ let Predicates = [Is64Bit] in {
def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>;
def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
+def : Pat<(i64 (and i64:$val, 0xffffffff)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext_inreg i64:$val, i32)), (SRAri $val, 0)>;
+
defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
@@ -130,7 +133,7 @@ def HM10 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(Val, MVT::i32);
}]>;
def : Pat<(i64 imm:$val),
- (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i64 32)),
+ (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i32 32)),
(ORri (SETHIi (HI22 $val)), (LO10 $val)))>,
Requires<[Is64Bit]>;
@@ -178,6 +181,45 @@ def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>;
//===----------------------------------------------------------------------===//
+// 64-bit Integer Multiply and Divide.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+def MULXrr : F3_1<2, 0b001001,
+ (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+ "mulx $rs1, $rs2, $rd",
+ [(set i64:$rd, (mul i64:$rs1, i64:$rs2))]>;
+def MULXri : F3_2<2, 0b001001,
+ (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+ "mulx $rs1, $i, $rd",
+ [(set i64:$rd, (mul i64:$rs1, (i64 simm13:$i)))]>;
+
+// Division can trap.
+let hasSideEffects = 1 in {
+def SDIVXrr : F3_1<2, 0b101101,
+ (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+ "sdivx $rs1, $rs2, $rd",
+ [(set i64:$rd, (sdiv i64:$rs1, i64:$rs2))]>;
+def SDIVXri : F3_2<2, 0b101101,
+ (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+ "sdivx $rs1, $i, $rd",
+ [(set i64:$rd, (sdiv i64:$rs1, (i64 simm13:$i)))]>;
+
+def UDIVXrr : F3_1<2, 0b001101,
+ (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+ "udivx $rs1, $rs2, $rd",
+ [(set i64:$rd, (udiv i64:$rs1, i64:$rs2))]>;
+def UDIVXri : F3_2<2, 0b001101,
+ (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+ "udivx $rs1, $i, $rd",
+ [(set i64:$rd, (udiv i64:$rs1, (i64 simm13:$i)))]>;
+} // hasSideEffects = 1
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
// 64-bit Loads and Stores.
//===----------------------------------------------------------------------===//
//
@@ -203,16 +245,22 @@ def LDXri : F3_2<3, 0b001011,
// Extending loads to i64.
def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>;
def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>;
def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>;
def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>;
def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
// Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
def LDSWrr : F3_1<3, 0b001011,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
index f101856..e7fde08 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -142,10 +142,10 @@ class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
// Define rr and ri shift instructions with patterns.
multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
ValueType VT, RegisterClass RC> {
- def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, RC:$rs2),
+ def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, IntRegs:$rs2),
!strconcat(OpcStr, " $rs, $rs2, $rd"),
- [(set VT:$rd, (OpNode VT:$rs, VT:$rs2))]>;
- def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, unknown:$shcnt),
+ [(set VT:$rd, (OpNode VT:$rs, i32:$rs2))]>;
+ def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, i32imm:$shcnt),
!strconcat(OpcStr, " $rs, $shcnt, $rd"),
- [(set VT:$rd, (OpNode VT:$rs, (VT imm:$shcnt)))]>;
+ [(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>;
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 5ff4395..baefb06 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -64,8 +64,7 @@ def HI22 : SDNodeXForm<imm, [{
}]>;
def SETHIimm : PatLeaf<(imm), [{
- return (((unsigned)N->getZExtValue() >> 10) << 10) ==
- (unsigned)N->getZExtValue();
+ return isShiftedUInt<22, 10>(N->getZExtValue());
}], HI22>;
// Addressing modes.
@@ -796,10 +795,8 @@ def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
// Add reg, lo. This is used when taking the addr of a global/constpool entry.
-def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)),
- (ADDri $r, tglobaladdr:$in)>;
-def : Pat<(add i32:$r, (SPlo tconstpool:$in)),
- (ADDri $r, tconstpool:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDri $r, tconstpool:$in)>;
// Calls:
def : Pat<(call tglobaladdr:$dst),
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index db9b30e..3af4c61 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -74,8 +74,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Addressable stack objects are accessed using neg. offsets from %fp
MachineFunction &MF = *MI.getParent()->getParent();
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(FIOperandNum + 1).getImm();
+ int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MI.getOperand(FIOperandNum + 1).getImm() +
+ Subtarget.getStackPointerBias();
// Replace frame index with a frame pointer reference.
if (Offset >= -4096 && Offset <= 4095) {
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index a81931b..b94dd11 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -52,6 +52,12 @@ public:
}
return std::string(p);
}
+
+ /// The 64-bit ABI uses biased stack and frame pointers, so the stack frame
+ /// of the current function is the area from [%sp+BIAS] to [%fp+BIAS].
+ int64_t getStackPointerBias() const {
+ return is64Bit() ? 2047 : 0;
+ }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
new file mode 100644
index 0000000..c7725a1
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -0,0 +1,689 @@
+//===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+// Return true if Expr is in the range [MinValue, MaxValue].
+static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
+ int64_t Value = CE->getValue();
+ return Value >= MinValue && Value <= MaxValue;
+ }
+ return false;
+}
+
+namespace {
+class SystemZOperand : public MCParsedAsmOperand {
+public:
+ enum RegisterKind {
+ GR32Reg,
+ GR64Reg,
+ GR128Reg,
+ ADDR32Reg,
+ ADDR64Reg,
+ FP32Reg,
+ FP64Reg,
+ FP128Reg
+ };
+
+private:
+ enum OperandKind {
+ KindToken,
+ KindReg,
+ KindAccessReg,
+ KindImm,
+ KindMem
+ };
+
+ OperandKind Kind;
+ SMLoc StartLoc, EndLoc;
+
+ // A string of length Length, starting at Data.
+ struct TokenOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ // LLVM register Num, which has kind Kind.
+ struct RegOp {
+ RegisterKind Kind;
+ unsigned Num;
+ };
+
+ // Base + Disp + Index, where Base and Index are LLVM registers or 0.
+ // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg).
+ struct MemOp {
+ unsigned Base : 8;
+ unsigned Index : 8;
+ unsigned RegKind : 8;
+ unsigned Unused : 8;
+ const MCExpr *Disp;
+ };
+
+ union {
+ TokenOp Token;
+ RegOp Reg;
+ unsigned AccessReg;
+ const MCExpr *Imm;
+ MemOp Mem;
+ };
+
+ SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
+ : Kind(kind), StartLoc(startLoc), EndLoc(endLoc)
+ {}
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (Expr == 0)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+public:
+ // Create particular kinds of operand.
+ static SystemZOperand *createToken(StringRef Str, SMLoc Loc) {
+ SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc);
+ Op->Token.Data = Str.data();
+ Op->Token.Length = Str.size();
+ return Op;
+ }
+ static SystemZOperand *createReg(RegisterKind Kind, unsigned Num,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ SystemZOperand *Op = new SystemZOperand(KindReg, StartLoc, EndLoc);
+ Op->Reg.Kind = Kind;
+ Op->Reg.Num = Num;
+ return Op;
+ }
+ static SystemZOperand *createAccessReg(unsigned Num, SMLoc StartLoc,
+ SMLoc EndLoc) {
+ SystemZOperand *Op = new SystemZOperand(KindAccessReg, StartLoc, EndLoc);
+ Op->AccessReg = Num;
+ return Op;
+ }
+ static SystemZOperand *createImm(const MCExpr *Expr, SMLoc StartLoc,
+ SMLoc EndLoc) {
+ SystemZOperand *Op = new SystemZOperand(KindImm, StartLoc, EndLoc);
+ Op->Imm = Expr;
+ return Op;
+ }
+ static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base,
+ const MCExpr *Disp, unsigned Index,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc);
+ Op->Mem.RegKind = RegKind;
+ Op->Mem.Base = Base;
+ Op->Mem.Index = Index;
+ Op->Mem.Disp = Disp;
+ return Op;
+ }
+
+ // Token operands
+ virtual bool isToken() const LLVM_OVERRIDE {
+ return Kind == KindToken;
+ }
+ StringRef getToken() const {
+ assert(Kind == KindToken && "Not a token");
+ return StringRef(Token.Data, Token.Length);
+ }
+
+ // Register operands.
+ virtual bool isReg() const LLVM_OVERRIDE {
+ return Kind == KindReg;
+ }
+ bool isReg(RegisterKind RegKind) const {
+ return Kind == KindReg && Reg.Kind == RegKind;
+ }
+ virtual unsigned getReg() const LLVM_OVERRIDE {
+ assert(Kind == KindReg && "Not a register");
+ return Reg.Num;
+ }
+
+ // Access register operands. Access registers aren't exposed to LLVM
+ // as registers.
+ bool isAccessReg() const {
+ return Kind == KindAccessReg;
+ }
+
+ // Immediate operands.
+ virtual bool isImm() const LLVM_OVERRIDE {
+ return Kind == KindImm;
+ }
+ bool isImm(int64_t MinValue, int64_t MaxValue) const {
+ return Kind == KindImm && inRange(Imm, MinValue, MaxValue);
+ }
+ const MCExpr *getImm() const {
+ assert(Kind == KindImm && "Not an immediate");
+ return Imm;
+ }
+
+ // Memory operands.
+ virtual bool isMem() const LLVM_OVERRIDE {
+ return Kind == KindMem;
+ }
+ bool isMem(RegisterKind RegKind, bool HasIndex) const {
+ return (Kind == KindMem &&
+ Mem.RegKind == RegKind &&
+ (HasIndex || !Mem.Index));
+ }
+ bool isMemDisp12(RegisterKind RegKind, bool HasIndex) const {
+ return isMem(RegKind, HasIndex) && inRange(Mem.Disp, 0, 0xfff);
+ }
+ bool isMemDisp20(RegisterKind RegKind, bool HasIndex) const {
+ return isMem(RegKind, HasIndex) && inRange(Mem.Disp, -524288, 524287);
+ }
+
+ // Override MCParsedAsmOperand.
+ virtual SMLoc getStartLoc() const LLVM_OVERRIDE { return StartLoc; }
+ virtual SMLoc getEndLoc() const LLVM_OVERRIDE { return EndLoc; }
+ virtual void print(raw_ostream &OS) const LLVM_OVERRIDE;
+
+ // Used by the TableGen code to add particular types of operand
+ // to an instruction.
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+ void addAccessRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ assert(Kind == KindAccessReg && "Invalid operand type");
+ Inst.addOperand(MCOperand::CreateImm(AccessReg));
+ }
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands");
+ addExpr(Inst, getImm());
+ }
+ void addBDAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands");
+ assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type");
+ Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ }
+ void addBDXAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands");
+ assert(Kind == KindMem && "Invalid operand type");
+ Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ Inst.addOperand(MCOperand::CreateReg(Mem.Index));
+ }
+
+ // Used by the TableGen code to check for particular operand types.
+ bool isGR32() const { return isReg(GR32Reg); }
+ bool isGR64() const { return isReg(GR64Reg); }
+ bool isGR128() const { return isReg(GR128Reg); }
+ bool isADDR32() const { return isReg(ADDR32Reg); }
+ bool isADDR64() const { return isReg(ADDR64Reg); }
+ bool isADDR128() const { return false; }
+ bool isFP32() const { return isReg(FP32Reg); }
+ bool isFP64() const { return isReg(FP64Reg); }
+ bool isFP128() const { return isReg(FP128Reg); }
+ bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, false); }
+ bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, false); }
+ bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, false); }
+ bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, false); }
+ bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, true); }
+ bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, true); }
+ bool isU4Imm() const { return isImm(0, 15); }
+ bool isU6Imm() const { return isImm(0, 63); }
+ bool isU8Imm() const { return isImm(0, 255); }
+ bool isS8Imm() const { return isImm(-128, 127); }
+ bool isU16Imm() const { return isImm(0, 65535); }
+ bool isS16Imm() const { return isImm(-32768, 32767); }
+ bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); }
+ bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); }
+};
+
+// Maps of asm register numbers to LLVM register numbers, with 0 indicating
+// an invalid register. We don't use register class directly because that
+// specifies the allocation order.
+static const unsigned GR32Regs[] = {
+ SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W,
+ SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W,
+ SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W,
+ SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W
+};
+static const unsigned GR64Regs[] = {
+ SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D,
+ SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D,
+ SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D,
+ SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D
+};
+static const unsigned GR128Regs[] = {
+ SystemZ::R0Q, 0, SystemZ::R2Q, 0,
+ SystemZ::R4Q, 0, SystemZ::R6Q, 0,
+ SystemZ::R8Q, 0, SystemZ::R10Q, 0,
+ SystemZ::R12Q, 0, SystemZ::R14Q, 0
+};
+static const unsigned FP32Regs[] = {
+ SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+ SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S
+};
+static const unsigned FP64Regs[] = {
+ SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+ SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+ SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+ SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D
+};
+static const unsigned FP128Regs[] = {
+ SystemZ::F0Q, SystemZ::F1Q, 0, 0,
+ SystemZ::F4Q, SystemZ::F5Q, 0, 0,
+ SystemZ::F8Q, SystemZ::F9Q, 0, 0,
+ SystemZ::F12Q, SystemZ::F13Q, 0, 0
+};
+
+class SystemZAsmParser : public MCTargetAsmParser {
+#define GET_ASSEMBLER_HEADER
+#include "SystemZGenAsmMatcher.inc"
+
+private:
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+ struct Register {
+ char Prefix;
+ unsigned Number;
+ SMLoc StartLoc, EndLoc;
+ };
+
+ bool parseRegister(Register &Reg);
+
+ OperandMatchResultTy
+ parseRegister(Register &Reg, char Prefix, const unsigned *Regs,
+ bool IsAddress = false);
+
+ OperandMatchResultTy
+ parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ char Prefix, const unsigned *Regs,
+ SystemZOperand::RegisterKind Kind,
+ bool IsAddress = false);
+
+ OperandMatchResultTy
+ parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ const unsigned *Regs, SystemZOperand::RegisterKind RegKind,
+ bool HasIndex);
+
+ bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic);
+
+public:
+ SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ MCAsmParserExtension::Initialize(Parser);
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ // Override MCTargetAsmParser.
+ virtual bool ParseDirective(AsmToken DirectiveID) LLVM_OVERRIDE;
+ virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) LLVM_OVERRIDE;
+ virtual bool ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands)
+ LLVM_OVERRIDE;
+ virtual bool
+ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) LLVM_OVERRIDE;
+
+ // Used by the TableGen code to parse particular operand types.
+ OperandMatchResultTy
+ parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg);
+ }
+ OperandMatchResultTy
+ parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg);
+ }
+ OperandMatchResultTy
+ parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg);
+ }
+ OperandMatchResultTy
+ parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg,
+ true);
+ }
+ OperandMatchResultTy
+ parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg,
+ true);
+ }
+ OperandMatchResultTy
+ parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ llvm_unreachable("Shouldn't be used as an operand");
+ }
+ OperandMatchResultTy
+ parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg);
+ }
+ OperandMatchResultTy
+ parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg);
+ }
+ OperandMatchResultTy
+ parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg);
+ }
+ OperandMatchResultTy
+ parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false);
+ }
+ OperandMatchResultTy
+ parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false);
+ }
+ OperandMatchResultTy
+ parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true);
+ }
+ OperandMatchResultTy
+ parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+};
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#include "SystemZGenAsmMatcher.inc"
+
+void SystemZOperand::print(raw_ostream &OS) const {
+ llvm_unreachable("Not implemented");
+}
+
+// Parse one register of the form %<prefix><number>.
+bool SystemZAsmParser::parseRegister(Register &Reg) {
+ Reg.StartLoc = Parser.getTok().getLoc();
+
+ // Eat the % prefix.
+ if (Parser.getTok().isNot(AsmToken::Percent))
+ return true;
+ Parser.Lex();
+
+ // Expect a register name.
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return true;
+
+ // Check the prefix.
+ StringRef Name = Parser.getTok().getString();
+ if (Name.size() < 2)
+ return true;
+ Reg.Prefix = Name[0];
+
+ // Treat the rest of the register name as a register number.
+ if (Name.substr(1).getAsInteger(10, Reg.Number))
+ return true;
+
+ Reg.EndLoc = Parser.getTok().getLoc();
+ Parser.Lex();
+ return false;
+}
+
+// Parse a register with prefix Prefix and convert it to LLVM numbering.
+// Regs maps asm register numbers to LLVM register numbers, with zero
+// entries indicating an invalid register. IsAddress says whether the
+// register appears in an address context.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseRegister(Register &Reg, char Prefix,
+ const unsigned *Regs, bool IsAddress) {
+ if (parseRegister(Reg))
+ return MatchOperand_NoMatch;
+ if (Reg.Prefix != Prefix || Reg.Number > 15 || Regs[Reg.Number] == 0) {
+ Error(Reg.StartLoc, "invalid register");
+ return MatchOperand_ParseFail;
+ }
+ if (Reg.Number == 0 && IsAddress) {
+ Error(Reg.StartLoc, "%r0 used in an address");
+ return MatchOperand_ParseFail;
+ }
+ Reg.Number = Regs[Reg.Number];
+ return MatchOperand_Success;
+}
+
+// Parse a register and add it to Operands. Prefix is 'r' for GPRs,
+// 'f' for FPRs, etc. Regs maps asm register numbers to LLVM register numbers,
+// with zero entries indicating an invalid register. Kind is the type of
+// register represented by Regs and IsAddress says whether the register is
+// being parsed in an address context, meaning that %r0 evaluates as 0.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ char Prefix, const unsigned *Regs,
+ SystemZOperand::RegisterKind Kind,
+ bool IsAddress) {
+ Register Reg;
+ OperandMatchResultTy Result = parseRegister(Reg, Prefix, Regs, IsAddress);
+ if (Result == MatchOperand_Success)
+ Operands.push_back(SystemZOperand::createReg(Kind, Reg.Number,
+ Reg.StartLoc, Reg.EndLoc));
+ return Result;
+}
+
+// Parse a memory operand and add it to Operands. Regs maps asm register
+// numbers to LLVM address registers and RegKind says what kind of address
+// register we're using (ADDR32Reg or ADDR64Reg). HasIndex says whether
+// the address allows index registers.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ const unsigned *Regs,
+ SystemZOperand::RegisterKind RegKind,
+ bool HasIndex) {
+ SMLoc StartLoc = Parser.getTok().getLoc();
+
+ // Parse the displacement, which must always be present.
+ const MCExpr *Disp;
+ if (getParser().parseExpression(Disp))
+ return MatchOperand_NoMatch;
+
+ // Parse the optional base and index.
+ unsigned Index = 0;
+ unsigned Base = 0;
+ if (getLexer().is(AsmToken::LParen)) {
+ Parser.Lex();
+
+ // Parse the first register.
+ Register Reg;
+ OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true);
+ if (Result != MatchOperand_Success)
+ return Result;
+
+ // Check whether there's a second register. If so, the one that we
+ // just parsed was the index.
+ if (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex();
+
+ if (!HasIndex) {
+ Error(Reg.StartLoc, "invalid use of indexed addressing");
+ return MatchOperand_ParseFail;
+ }
+
+ Index = Reg.Number;
+ Result = parseRegister(Reg, 'r', GR64Regs, true);
+ if (Result != MatchOperand_Success)
+ return Result;
+ }
+ Base = Reg.Number;
+
+ // Consume the closing bracket.
+ if (getLexer().isNot(AsmToken::RParen))
+ return MatchOperand_NoMatch;
+ Parser.Lex();
+ }
+
+ SMLoc EndLoc =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index,
+ StartLoc, EndLoc));
+ return MatchOperand_Success;
+}
+
+bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
+ return true;
+}
+
+bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ Register Reg;
+ if (parseRegister(Reg))
+ return Error(Reg.StartLoc, "register expected");
+ if (Reg.Prefix == 'r' && Reg.Number < 16)
+ RegNo = GR64Regs[Reg.Number];
+ else if (Reg.Prefix == 'f' && Reg.Number < 16)
+ RegNo = FP64Regs[Reg.Number];
+ else
+ return Error(Reg.StartLoc, "invalid register");
+ StartLoc = Reg.StartLoc;
+ EndLoc = Reg.EndLoc;
+ return false;
+}
+
+bool SystemZAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ Operands.push_back(SystemZOperand::createToken(Name, NameLoc));
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ // Read the first operand.
+ if (parseOperand(Operands, Name)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+
+ // Read any subsequent operands.
+ while (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex();
+ if (parseOperand(Operands, Name)) {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "unexpected token in argument list");
+ }
+ }
+
+ // Consume the EndOfStatement.
+ Parser.Lex();
+ return false;
+}
+
+bool SystemZAsmParser::
+parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ StringRef Mnemonic) {
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
+ return false;
+
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
+
+ // The only other type of operand is an immediate.
+ const MCExpr *Expr;
+ SMLoc StartLoc = Parser.getTok().getLoc();
+ if (getParser().parseExpression(Expr))
+ return true;
+
+ SMLoc EndLoc =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+ return false;
+}
+
+bool SystemZAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+ unsigned MatchResult;
+
+ MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+ MatchingInlineAsm);
+ switch (MatchResult) {
+ default: break;
+ case Match_Success:
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst);
+ return false;
+
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg);
+ }
+
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((SystemZOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+
+ case Match_MnemonicFail:
+ return Error(IDLoc, "invalid instruction");
+ }
+
+ llvm_unreachable("Unexpected match type");
+}
+
+SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
+parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ Register Reg;
+ if (parseRegister(Reg))
+ return MatchOperand_NoMatch;
+ if (Reg.Prefix != 'a' || Reg.Number > 15) {
+ Error(Reg.StartLoc, "invalid register");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(SystemZOperand::createAccessReg(Reg.Number,
+ Reg.StartLoc, Reg.EndLoc));
+ return MatchOperand_Success;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmParser() {
+ RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
new file mode 100644
index 0000000..d73cf49
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -0,0 +1,150 @@
+//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "SystemZInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
+ unsigned Index, raw_ostream &O) {
+ O << Disp;
+ if (Base) {
+ O << '(';
+ if (Index)
+ O << '%' << getRegisterName(Index) << ',';
+ O << '%' << getRegisterName(Base) << ')';
+ } else
+ assert(!Index && "Shouldn't have an index without a base");
+}
+
+void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
+ if (MO.isReg())
+ O << '%' << getRegisterName(MO.getReg());
+ else if (MO.isImm())
+ O << MO.getImm();
+ else if (MO.isExpr())
+ O << *MO.getExpr();
+ else
+ llvm_unreachable("Invalid operand");
+}
+
+void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+ O << '%' << getRegisterName(RegNo);
+}
+
+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isUInt<4>(Value) && "Invalid u4imm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isUInt<6>(Value) && "Invalid u6imm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isInt<8>(Value) && "Invalid s8imm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isUInt<8>(Value) && "Invalid u8imm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isInt<16>(Value) && "Invalid s16imm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isUInt<16>(Value) && "Invalid u16imm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isInt<32>(Value) && "Invalid s32imm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ int64_t Value = MI->getOperand(OpNum).getImm();
+ assert(isUInt<32>(Value) && "Invalid u32imm argument");
+ O << Value;
+}
+
+void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ uint64_t Value = MI->getOperand(OpNum).getImm();
+ assert(Value < 16 && "Invalid access register number");
+ O << "%a" << (unsigned int)Value;
+}
+
+void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printOperand(MI, OpNum, O);
+ O << "@PLT";
+}
+
+void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printOperand(MI->getOperand(OpNum), O);
+}
+
+void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printAddress(MI->getOperand(OpNum).getReg(),
+ MI->getOperand(OpNum + 1).getImm(), 0, O);
+}
+
+void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printAddress(MI->getOperand(OpNum).getReg(),
+ MI->getOperand(OpNum + 1).getImm(),
+ MI->getOperand(OpNum + 2).getReg(), O);
+}
+
+void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ static const char *const CondNames[] = {
+ "o", "h", "nle", "l", "nhe", "lh", "ne",
+ "e", "nlh", "he", "nl", "le", "nh", "no"
+ };
+ uint64_t Imm = MI->getOperand(OpNum).getImm();
+ assert(Imm > 0 && Imm < 15 && "Invalid condition");
+ O << CondNames[Imm - 1];
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
new file mode 100644
index 0000000..b82e79d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -0,0 +1,68 @@
+//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a SystemZ MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZINSTPRINTER_H
+#define LLVM_SYSTEMZINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCOperand;
+
+class SystemZInstPrinter : public MCInstPrinter {
+public:
+ SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ // Automatically generated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ // Print an address with the given base, displacement and index.
+ static void printAddress(unsigned Base, int64_t Disp, unsigned Index,
+ raw_ostream &O);
+
+ // Print the given operand.
+ static void printOperand(const MCOperand &MO, raw_ostream &O);
+
+ // Override MCInstPrinter.
+ virtual void printRegName(raw_ostream &O, unsigned RegNo) const
+ LLVM_OVERRIDE;
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot)
+ LLVM_OVERRIDE;
+
+private:
+ // Print various types of operand.
+ void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+
+ // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
+ // This forms part of the instruction name rather than the operand list.
+ void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
new file mode 100644
index 0000000..e901c6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -0,0 +1,151 @@
+//===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+using namespace llvm;
+
+// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot].
+// Return the bits that should be installed in a relocation field for
+// fixup kind Kind.
+static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
+ if (Kind < FirstTargetFixupKind)
+ return Value;
+
+ switch (unsigned(Kind)) {
+ case SystemZ::FK_390_PC16DBL:
+ case SystemZ::FK_390_PC32DBL:
+ case SystemZ::FK_390_PLT16DBL:
+ case SystemZ::FK_390_PLT32DBL:
+ return (int64_t)Value / 2;
+ }
+
+ llvm_unreachable("Unknown fixup kind!");
+}
+
+// If Opcode can be relaxed, return the relaxed form, otherwise return 0.
+static unsigned getRelaxedOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case SystemZ::BRC: return SystemZ::BRCL;
+ case SystemZ::J: return SystemZ::JG;
+ case SystemZ::BRAS: return SystemZ::BRASL;
+ }
+ return 0;
+}
+
+namespace {
+class SystemZMCAsmBackend : public MCAsmBackend {
+ uint8_t OSABI;
+public:
+ SystemZMCAsmBackend(uint8_t osABI)
+ : OSABI(osABI) {}
+
+ // Override MCAsmBackend
+ virtual unsigned getNumFixupKinds() const LLVM_OVERRIDE {
+ return SystemZ::NumTargetFixupKinds;
+ }
+ virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const
+ LLVM_OVERRIDE;
+ virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const LLVM_OVERRIDE;
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE;
+ virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *Fragment,
+ const MCAsmLayout &Layout) const
+ LLVM_OVERRIDE;
+ virtual void relaxInstruction(const MCInst &Inst,
+ MCInst &Res) const LLVM_OVERRIDE;
+ virtual bool writeNopData(uint64_t Count,
+ MCObjectWriter *OW) const LLVM_OVERRIDE;
+ virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const
+ LLVM_OVERRIDE {
+ return createSystemZObjectWriter(OS, OSABI);
+ }
+ virtual bool doesSectionRequireSymbols(const MCSection &Section) const
+ LLVM_OVERRIDE {
+ return false;
+ }
+};
+} // end anonymous namespace
+
+const MCFixupKindInfo &
+SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
+ { "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
+void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value) const {
+ MCFixupKind Kind = Fixup.getKind();
+ unsigned Offset = Fixup.getOffset();
+ unsigned Size = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+
+ assert(Offset + Size <= DataSize && "Invalid fixup offset!");
+
+ // Big-endian insertion of Size bytes.
+ Value = extractBitsForFixup(Kind, Value);
+ unsigned ShiftValue = (Size * 8) - 8;
+ for (unsigned I = 0; I != Size; ++I) {
+ Data[Offset + I] |= uint8_t(Value >> ShiftValue);
+ ShiftValue -= 8;
+ }
+}
+
+bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+ return getRelaxedOpcode(Inst.getOpcode()) != 0;
+}
+
+bool
+SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *Fragment,
+ const MCAsmLayout &Layout) const {
+ // At the moment we just need to relax 16-bit fields to wider fields.
+ Value = extractBitsForFixup(Fixup.getKind(), Value);
+ return (int16_t)Value != (int64_t)Value;
+}
+
+void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst,
+ MCInst &Res) const {
+ unsigned Opcode = getRelaxedOpcode(Inst.getOpcode());
+ assert(Opcode && "Unexpected insn to relax");
+ Res = Inst;
+ Res.setOpcode(Opcode);
+}
+
+bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
+ MCObjectWriter *OW) const {
+ for (uint64_t I = 0; I != Count; ++I)
+ OW->Write8(7);
+ return true;
+}
+
+MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU) {
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new SystemZMCAsmBackend(OSABI);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
new file mode 100644
index 0000000..c96a0d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -0,0 +1,38 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+ PointerSize = 8;
+ CalleeSaveStackSlotSize = 8;
+ IsLittleEndian = false;
+
+ CommentString = "#";
+ PCSymbol = ".";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective = "\t.weak\t";
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = "\t.quad\t";
+ UsesELFSectionDirectiveForBSS = true;
+ SupportsDebugInformation = true;
+ HasLEB128 = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+const MCSection *
+SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
+ return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+ 0, SectionKind::getMetadata());
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
new file mode 100644
index 0000000..bac1bca
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -0,0 +1,31 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZTARGETASMINFO_H
+#define SystemZTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class Target;
+class StringRef;
+
+class SystemZMCAsmInfo : public MCAsmInfo {
+public:
+ explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
+
+ // Override MCAsmInfo;
+ virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const
+ LLVM_OVERRIDE;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
new file mode 100644
index 0000000..ea2250f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -0,0 +1,131 @@
+//===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+using namespace llvm;
+
+namespace {
+class SystemZMCCodeEmitter : public MCCodeEmitter {
+ const MCInstrInfo &MCII;
+ MCContext &Ctx;
+
+public:
+ SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), Ctx(ctx) {
+ }
+
+ ~SystemZMCCodeEmitter() {}
+
+ // OVerride MCCodeEmitter.
+ virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const
+ LLVM_OVERRIDE;
+
+private:
+ // Automatically generated by TableGen.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // Called by the TableGen code to get the binary encoding of operand
+ // MO in MI. Fixups is the list of fixups against MI.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ // Operand OpNum of MI needs a PC-relative fixup of kind Kind at
+ // Offset bytes from the start of MI. Add the fixup to Fixups
+ // and return the in-place addend, which since we're a RELA target
+ // is always 0.
+ unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ unsigned Kind, int64_t Offset) const;
+
+ unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2);
+ }
+ unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
+ }
+ unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2);
+ }
+ unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2);
+ }
+};
+}
+
+MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &MCSTI,
+ MCContext &Ctx) {
+ return new SystemZMCCodeEmitter(MCII, Ctx);
+}
+
+void SystemZMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
+ unsigned Size = MCII.get(MI.getOpcode()).getSize();
+ // Big-endian insertion of Size bytes.
+ unsigned ShiftValue = (Size * 8) - 8;
+ for (unsigned I = 0; I != Size; ++I) {
+ OS << uint8_t(Bits >> ShiftValue);
+ ShiftValue -= 8;
+ }
+}
+
+unsigned SystemZMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ if (MO.isReg())
+ return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ llvm_unreachable("Unexpected operand type!");
+}
+
+unsigned
+SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ unsigned Kind, int64_t Offset) const {
+ const MCOperand &MO = MI.getOperand(OpNum);
+ // For compatibility with the GNU assembler, treat constant operands as
+ // unadjusted PC-relative offsets.
+ if (MO.isImm())
+ return MO.getImm() / 2;
+
+ const MCExpr *Expr = MO.getExpr();
+ if (Offset) {
+ // The operand value is relative to the start of MI, but the fixup
+ // is relative to the operand field itself, which is Offset bytes
+ // into MI. Add Offset to the relocation value to cancel out
+ // this difference.
+ const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+ Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ }
+ Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind));
+ return 0;
+}
+
+#include "SystemZGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
new file mode 100644
index 0000000..9c94ebb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -0,0 +1,31 @@
+//===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZMCFIXUPS_H
+#define LLVM_SYSTEMZMCFIXUPS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace SystemZ {
+ enum FixupKind {
+ // These correspond directly to R_390_* relocations.
+ FK_390_PC16DBL = FirstTargetFixupKind,
+ FK_390_PC32DBL,
+ FK_390_PLT16DBL,
+ FK_390_PLT32DBL,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+ };
+}
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
new file mode 100644
index 0000000..36e3d83
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -0,0 +1,140 @@
+//===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+namespace {
+class SystemZObjectWriter : public MCELFObjectTargetWriter {
+public:
+ SystemZObjectWriter(uint8_t OSABI);
+
+ virtual ~SystemZObjectWriter();
+
+protected:
+ // Override MCELFObjectTargetWriter.
+ virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, bool IsRelocWithSymbol,
+ int64_t Addend) const LLVM_OVERRIDE;
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const LLVM_OVERRIDE;
+};
+} // end anonymouse namespace
+
+SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
+ : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
+ /*HasRelocationAddend=*/ true) {}
+
+SystemZObjectWriter::~SystemZObjectWriter() {
+}
+
+// Return the relocation type for an absolute value of MCFixupKind Kind.
+static unsigned getAbsoluteReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_1: return ELF::R_390_8;
+ case FK_Data_2: return ELF::R_390_16;
+ case FK_Data_4: return ELF::R_390_32;
+ case FK_Data_8: return ELF::R_390_64;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the relocation type for a PC-relative value of MCFixupKind Kind.
+static unsigned getPCRelReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_2: return ELF::R_390_PC16;
+ case FK_Data_4: return ELF::R_390_PC32;
+ case FK_Data_8: return ELF::R_390_PC64;
+ case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL;
+ case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL;
+ case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL;
+ case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL;
+ }
+ llvm_unreachable("Unsupported PC-relative address");
+}
+
+// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind.
+static unsigned getTLSLEReloc(unsigned Kind) {
+ switch (Kind) {
+ case FK_Data_4: return ELF::R_390_TLS_LE32;
+ case FK_Data_8: return ELF::R_390_TLS_LE64;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the PLT relocation counterpart of MCFixupKind Kind.
+static unsigned getPLTReloc(unsigned Kind) {
+ switch (Kind) {
+ case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL;
+ case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL;
+ }
+ llvm_unreachable("Unsupported absolute address");
+}
+
+unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel,
+ bool IsRelocWithSymbol,
+ int64_t Addend) const {
+ MCSymbolRefExpr::VariantKind Modifier = (Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None :
+ Target.getSymA()->getKind());
+ unsigned Kind = Fixup.getKind();
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_None:
+ if (IsPCRel)
+ return getPCRelReloc(Kind);
+ return getAbsoluteReloc(Kind);
+
+ case MCSymbolRefExpr::VK_NTPOFF:
+ assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
+ return getTLSLEReloc(Kind);
+
+ case MCSymbolRefExpr::VK_GOT:
+ if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+ return ELF::R_390_GOTENT;
+ llvm_unreachable("Only PC-relative GOT accesses are supported for now");
+
+ case MCSymbolRefExpr::VK_PLT:
+ assert(IsPCRel && "@PLT shouldt be PC-relative");
+ return getPLTReloc(Kind);
+
+ default:
+ llvm_unreachable("Modifier not supported");
+ }
+}
+
+const MCSymbol *SystemZObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ // The addend in a PC-relative R_390_* relocation is always applied to
+ // the PC-relative part of the address. If some kind of indirection
+ // is applied to the symbol first, we can't use an addend there too.
+ if (!Target.isAbsolute() &&
+ Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None &&
+ IsPCRel)
+ return &Target.getSymA()->getSymbol().AliasedSymbol();
+ return NULL;
+}
+
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 0000000..49a7f47
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,160 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCAsmInfo *createSystemZMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new SystemZMCAsmInfo(T, TT);
+ MachineLocation FPDst(MachineLocation::VirtualFP);
+ MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP);
+ MAI->addInitialFrameState(0, FPDst, FPSrc);
+ return MAI;
+}
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSystemZMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSystemZMCRegisterInfo(X, SystemZ::R14D);
+ return X;
+}
+
+static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
+ StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ // Static code is suitable for use in a dynamic executable; there is no
+ // separate DynamicNoPIC model.
+ if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
+ RM = Reloc::Static;
+
+ // For SystemZ we define the models as follows:
+ //
+ // Small: BRASL can call any function and will use a stub if necessary.
+ // Locally-binding symbols will always be in range of LARL.
+ //
+ // Medium: BRASL can call any function and will use a stub if necessary.
+ // GOT slots and locally-defined text will always be in range
+ // of LARL, but other symbols might not be.
+ //
+ // Large: Equivalent to Medium for now.
+ //
+ // Kernel: Equivalent to Medium for now.
+ //
+ // This means that any PIC module smaller than 4GB meets the
+ // requirements of Small, so Small seems like the best default there.
+ //
+ // All symbols bind locally in a non-PIC module, so the choice is less
+ // obvious. There are two cases:
+ //
+ // - When creating an executable, PLTs and copy relocations allow
+ // us to treat external symbols as part of the executable.
+ // Any executable smaller than 4GB meets the requirements of Small,
+ // so that seems like the best default.
+ //
+ // - When creating JIT code, stubs will be in range of BRASL if the
+ // image is less than 4GB in size. GOT entries will likewise be
+ // in range of LARL. However, the JIT environment has no equivalent
+ // of copy relocs, so locally-binding data symbols might not be in
+ // the range of LARL. We need the Medium model in that case.
+ if (CM == CodeModel::Default)
+ CM = CodeModel::Small;
+ else if (CM == CodeModel::JITDefault)
+ CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ return new SystemZInstPrinter(MAI, MII, MRI);
+}
+
+static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx,
+ MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool NoExecStack) {
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeSystemZTargetMC() {
+ // Register the MCAsmInfo.
+ TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
+ createSystemZMCAsmInfo);
+
+ // Register the MCCodeGenInfo.
+ TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
+ createSystemZMCCodeGenInfo);
+
+ // Register the MCCodeEmitter.
+ TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
+ createSystemZMCCodeEmitter);
+
+ // Register the MCInstrInfo.
+ TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+ createSystemZMCInstrInfo);
+
+ // Register the MCRegisterInfo.
+ TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
+ createSystemZMCRegisterInfo);
+
+ // Register the MCSubtargetInfo.
+ TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+ createSystemZMCSubtargetInfo);
+
+ // Register the MCAsmBackend.
+ TargetRegistry::RegisterMCAsmBackend(TheSystemZTarget,
+ createSystemZMCAsmBackend);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget,
+ createSystemZMCInstPrinter);
+
+ // Register the MCObjectStreamer;
+ TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget,
+ createSystemZMCObjectStreamer);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 0000000..229912f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,62 @@
+//===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMCTARGETDESC_H
+#define SYSTEMZMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheSystemZTarget;
+
+namespace SystemZMC {
+ // How many bytes are in the ABI-defined, caller-allocated part of
+ // a stack frame.
+ const int64_t CallFrameSize = 160;
+
+ // The offset of the DWARF CFA from the incoming stack pointer.
+ const int64_t CFAOffsetFromInitialSP = CallFrameSize;
+}
+
+MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT,
+ StringRef CPU);
+
+MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
+} // end namespace llvm
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt
new file mode 100644
index 0000000..d1f56a4
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/README.txt
@@ -0,0 +1,146 @@
+//===---------------------------------------------------------------------===//
+// Random notes about and ideas for the SystemZ backend.
+//===---------------------------------------------------------------------===//
+
+The initial backend is deliberately restricted to z10. We should add support
+for later architectures at some point.
+
+--
+
+SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand() is passed "m" for all
+inline asm memory constraints; it doesn't get to see the original constraint.
+This means that it must conservatively treat all inline asm constraints
+as the most restricted type, "R".
+
+--
+
+If an inline asm ties an i32 "r" result to an i64 input, the input
+will be treated as an i32, leaving the upper bits uninitialised.
+For example:
+
+define void @f4(i32 *%dst) {
+ %val = call i32 asm "blah $0", "=r,0" (i64 103)
+ store i32 %val, i32 *%dst
+ ret void
+}
+
+from CodeGen/SystemZ/asm-09.ll will use LHI rather than LGHI.
+to load 103. This seems to be a general target-independent problem.
+
+--
+
+The tuning of the choice between Load Address (LA) and addition in
+SystemZISelDAGToDAG.cpp is suspect. It should be tweaked based on
+performance measurements.
+
+--
+
+There is no scheduling support.
+
+--
+
+We don't use the Branch on Count or Branch on Index families of instruction.
+
+--
+
+We don't use the condition code results of anything except comparisons.
+
+Implementing this may need something more finely grained than the z_cmp
+and z_ucmp that we have now. It might (or might not) also be useful to
+have a mask of "don't care" values in conditional branches. For example,
+integer comparisons never set CC to 3, so the bottom bit of the CC mask
+isn't particularly relevant. JNLH and JE are equally good for testing
+equality after an integer comparison, etc.
+
+--
+
+We don't optimize string and block memory operations.
+
+--
+
+We don't take full advantage of builtins like fabsl because the calling
+conventions require f128s to be returned by invisible reference.
+
+--
+
+DAGCombiner can detect integer absolute, but there's not yet an associated
+ISD opcode. We could add one and implement it using Load Positive.
+Negated absolutes could use Load Negative.
+
+--
+
+DAGCombiner doesn't yet fold truncations of extended loads. Functions like:
+
+ unsigned long f (unsigned long x, unsigned short *y)
+ {
+ return (x << 32) | *y;
+ }
+
+therefore end up as:
+
+ sllg %r2, %r2, 32
+ llgh %r0, 0(%r3)
+ lr %r2, %r0
+ br %r14
+
+but truncating the load would give:
+
+ sllg %r2, %r2, 32
+ lh %r2, 0(%r3)
+ br %r14
+
+--
+
+Functions like:
+
+define i64 @f1(i64 %a) {
+ %and = and i64 %a, 1
+ ret i64 %and
+}
+
+ought to be implemented as:
+
+ lhi %r0, 1
+ ngr %r2, %r0
+ br %r14
+
+but two-address optimisations reverse the order of the AND and force:
+
+ lhi %r0, 1
+ ngr %r0, %r2
+ lgr %r2, %r0
+ br %r14
+
+CodeGen/SystemZ/and-04.ll has several examples of this.
+
+--
+
+Out-of-range displacements are usually handled by loading the full
+address into a register. In many cases it would be better to create
+an anchor point instead. E.g. for:
+
+define void @f4a(i128 *%aptr, i64 %base) {
+ %addr = add i64 %base, 524288
+ %bptr = inttoptr i64 %addr to i128 *
+ %a = load volatile i128 *%aptr
+ %b = load i128 *%bptr
+ %add = add i128 %a, %b
+ store i128 %add, i128 *%aptr
+ ret void
+}
+
+(from CodeGen/SystemZ/int-add-08.ll) we load %base+524288 and %base+524296
+into separate registers, rather than using %base+524288 as a base for both.
+
+--
+
+Dynamic stack allocations round the size to 8 bytes and then allocate
+that rounded amount. It would be simpler to subtract the unrounded
+size from the copy of the stack pointer and then align the result.
+See CodeGen/SystemZ/alloca-01.ll for an example.
+
+--
+
+Atomic loads and stores use the default compare-and-swap based implementation.
+This is probably much too conservative in practice, and the overhead is
+especially bad for 8- and 16-bit accesses.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 0000000..b811cbe
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,77 @@
+//==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_H
+#define SYSTEMZ_H
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+ class SystemZTargetMachine;
+ class FunctionPass;
+
+ namespace SystemZ {
+ // Condition-code mask values.
+ const unsigned CCMASK_0 = 1 << 3;
+ const unsigned CCMASK_1 = 1 << 2;
+ const unsigned CCMASK_2 = 1 << 1;
+ const unsigned CCMASK_3 = 1 << 0;
+ const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3;
+
+ // Condition-code mask assignments for floating-point comparisons.
+ const unsigned CCMASK_CMP_EQ = CCMASK_0;
+ const unsigned CCMASK_CMP_LT = CCMASK_1;
+ const unsigned CCMASK_CMP_GT = CCMASK_2;
+ const unsigned CCMASK_CMP_UO = CCMASK_3;
+ const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT;
+ const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT;
+ const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT;
+ const unsigned CCMASK_CMP_O = CCMASK_ANY ^ CCMASK_CMP_UO;
+
+ // Return true if Val fits an LLILL operand.
+ static inline bool isImmLL(uint64_t Val) {
+ return (Val & ~0x000000000000ffffULL) == 0;
+ }
+
+ // Return true if Val fits an LLILH operand.
+ static inline bool isImmLH(uint64_t Val) {
+ return (Val & ~0x00000000ffff0000ULL) == 0;
+ }
+
+ // Return true if Val fits an LLIHL operand.
+ static inline bool isImmHL(uint64_t Val) {
+ return (Val & ~0x00000ffff00000000ULL) == 0;
+ }
+
+ // Return true if Val fits an LLIHH operand.
+ static inline bool isImmHH(uint64_t Val) {
+ return (Val & ~0xffff000000000000ULL) == 0;
+ }
+
+ // Return true if Val fits an LLILF operand.
+ static inline bool isImmLF(uint64_t Val) {
+ return (Val & ~0x00000000ffffffffULL) == 0;
+ }
+
+ // Return true if Val fits an LLIHF operand.
+ static inline bool isImmHF(uint64_t Val) {
+ return (Val & ~0xffffffff00000000ULL) == 0;
+ }
+ }
+
+ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+} // end namespace llvm;
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 0000000..e03c32f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,75 @@
+//===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"z10", []>;
+
+//===----------------------------------------------------------------------===//
+// Register file description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling convention description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZOperators.td"
+include "SystemZOperands.td"
+include "SystemZPatterns.td"
+include "SystemZInstrFormats.td"
+include "SystemZInstrInfo.td"
+include "SystemZInstrFP.td"
+
+def SystemZInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Assembly parser
+//===----------------------------------------------------------------------===//
+
+def SystemZAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterName = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly writer
+//===----------------------------------------------------------------------===//
+
+def SystemZAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-level target declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+ let InstructionSet = SystemZInstrInfo;
+ let AssemblyParsers = [SystemZAsmParser];
+ let AssemblyWriters = [SystemZAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 0000000..1e15ab1
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,113 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Streams SystemZ assembly language and associated data, in the form of
+// MCInsts and MCExprs respectively.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZAsmPrinter.h"
+#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZConstantPoolValue.h"
+#include "SystemZMCInstLower.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+ MCInst LoweredMI;
+ Lower.lower(MI, LoweredMI);
+ OutStreamer.EmitInstruction(LoweredMI);
+}
+
+// Convert a SystemZ-specific constant pool modifier into the associated
+// MCSymbolRefExpr variant kind.
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
+ switch (Modifier) {
+ case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF;
+ }
+ llvm_unreachable("Invalid SystemCPModifier!");
+}
+
+void SystemZAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+ SystemZConstantPoolValue *ZCPV =
+ static_cast<SystemZConstantPoolValue*>(MCPV);
+
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(Mang->getSymbol(ZCPV->getGlobalValue()),
+ getModifierVariantKind(ZCPV->getModifier()),
+ OutContext);
+ uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
+
+ OutStreamer.EmitValue(Expr, Size);
+}
+
+bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ if (ExtraCode && *ExtraCode == 'n') {
+ if (!MI->getOperand(OpNo).isImm())
+ return true;
+ OS << -int64_t(MI->getOperand(OpNo).getImm());
+ } else {
+ SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+ MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
+ SystemZInstPrinter::printOperand(MO, OS);
+ }
+ return false;
+}
+
+bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(),
+ MI->getOperand(OpNo + 1).getImm(),
+ MI->getOperand(OpNo + 2).getReg(), OS);
+ return false;
+}
+
+void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const DataLayout *TD = TM.getDataLayout();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ TD->getPointerSize(0), 0);
+ }
+ Stubs.clear();
+ }
+ }
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+ RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
new file mode 100644
index 0000000..4b6c51b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -0,0 +1,52 @@
+//===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZASMPRINTER_H
+#define SYSTEMZASMPRINTER_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineBasicBlock;
+class MachineInstr;
+class Module;
+class raw_ostream;
+
+class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
+private:
+ const SystemZSubtarget *Subtarget;
+
+public:
+ SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {
+ Subtarget = &TM.getSubtarget<SystemZSubtarget>();
+ }
+
+ // Override AsmPrinter.
+ virtual const char *getPassName() const LLVM_OVERRIDE {
+ return "SystemZ Assembly Printer";
+ }
+ virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
+ virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV)
+ LLVM_OVERRIDE;
+ virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) LLVM_OVERRIDE;
+ virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) LLVM_OVERRIDE;
+ virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
new file mode 100644
index 0000000..cc9c84b
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -0,0 +1,21 @@
+//===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZCallingConv.h"
+#include "SystemZRegisterInfo.h"
+
+using namespace llvm;
+
+const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
+ SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D
+};
+
+const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
+ SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
+};
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
new file mode 100644
index 0000000..298985e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -0,0 +1,23 @@
+//===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZCALLINGCONV_H
+#define SYSTEMZCALLINGCONV_H
+
+namespace llvm {
+ namespace SystemZ {
+ const unsigned NumArgGPRs = 5;
+ extern const unsigned ArgGPRs[NumArgGPRs];
+
+ const unsigned NumArgFPRs = 4;
+ extern const unsigned ArgFPRs[NumArgFPRs];
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 0000000..c2d727f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,65 @@
+//=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for the SystemZ ABI.
+//===----------------------------------------------------------------------===//
+
+class CCIfExtend<CCAction A>
+ : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
+
+//===----------------------------------------------------------------------===//
+// SVR4 return value calling convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+ // Promote i32 to i64 if it has an explicit extension type.
+ CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+
+ // ABI-compliant code returns 64-bit integers in R2. Make the other
+ // call-clobbered argument registers available for code that doesn't
+ // care about the ABI. (R6 is an argument register too, but is
+ // call-saved and therefore not suitable for return values.)
+ CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W]>>,
+ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+ // ABI-complaint code returns float and double in F0. Make the
+ // other floating-point argument registers available for code that
+ // doesn't care about the ABI. All floating-point argument registers
+ // are call-clobbered, so we can use all of them here.
+ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+ CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>
+
+ // ABI-compliant code returns long double by reference, but that conversion
+ // is left to higher-level code. Perhaps we could add an f128 definition
+ // here for code that doesn't care about the ABI?
+]>;
+
+//===----------------------------------------------------------------------===//
+// SVR4 argument calling conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+ // Promote i32 to i64 if it has an explicit extension type.
+ // The convention is that true integer arguments that are smaller
+ // than 64 bits should be marked as extended, but structures that
+ // are smaller than 64 bits shouldn't.
+ CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+
+ // Force long double values to the stack and pass i64 pointers to them.
+ CCIfType<[f128], CCPassIndirect<i64>>,
+
+ // The first 5 integer arguments are passed in R2-R6. Note that R6
+ // is call-saved.
+ CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W, R6W]>>,
+ CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+ // The first 4 float and double arguments are passed in even registers F0-F6.
+ CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+ CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
+
+ // Other arguments are passed in 8-byte-aligned 8-byte stack slots.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
new file mode 100644
index 0000000..e9c4f6d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -0,0 +1,62 @@
+//===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+SystemZConstantPoolValue::
+SystemZConstantPoolValue(const GlobalValue *gv,
+ SystemZCP::SystemZCPModifier modifier)
+ : MachineConstantPoolValue(gv->getType()), GV(gv), Modifier(modifier) {}
+
+SystemZConstantPoolValue *
+SystemZConstantPoolValue::Create(const GlobalValue *GV,
+ SystemZCP::SystemZCPModifier Modifier) {
+ return new SystemZConstantPoolValue(GV, Modifier);
+}
+
+unsigned SystemZConstantPoolValue::getRelocationInfo() const {
+ switch (Modifier) {
+ case SystemZCP::NTPOFF:
+ // May require a relocation, but the relocations are always resolved
+ // by the static linker.
+ return 1;
+ }
+ llvm_unreachable("Unknown modifier");
+}
+
+int SystemZConstantPoolValue::
+getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
+ if (Constants[I].isMachineConstantPoolEntry() &&
+ (Constants[I].getAlignment() & AlignMask) == 0) {
+ SystemZConstantPoolValue *ZCPV =
+ static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal);
+ if (ZCPV->GV == GV && ZCPV->Modifier == Modifier)
+ return I;
+ }
+ }
+ return -1;
+}
+
+void SystemZConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+ ID.AddPointer(GV);
+ ID.AddInteger(Modifier);
+}
+
+void SystemZConstantPoolValue::print(raw_ostream &O) const {
+ O << GV << "@" << int(Modifier);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
new file mode 100644
index 0000000..9927bdb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -0,0 +1,55 @@
+//===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZCONSTANTPOOLVALUE_H
+#define SYSTEMZCONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+namespace SystemZCP {
+ enum SystemZCPModifier {
+ NTPOFF
+ };
+}
+
+/// A SystemZ-specific constant pool value. At present, the only
+/// defined constant pool values are offsets of thread-local variables
+/// (written x@NTPOFF).
+class SystemZConstantPoolValue : public MachineConstantPoolValue {
+ const GlobalValue *GV;
+ SystemZCP::SystemZCPModifier Modifier;
+
+protected:
+ SystemZConstantPoolValue(const GlobalValue *GV,
+ SystemZCP::SystemZCPModifier Modifier);
+
+public:
+ static SystemZConstantPoolValue *
+ Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier);
+
+ // Override MachineConstantPoolValue.
+ virtual unsigned getRelocationInfo() const LLVM_OVERRIDE;
+ virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+ unsigned Alignment) LLVM_OVERRIDE;
+ virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID) LLVM_OVERRIDE;
+ virtual void print(raw_ostream &O) const LLVM_OVERRIDE;
+
+ // Access SystemZ-specific fields.
+ const GlobalValue *getGlobalValue() const { return GV; }
+ SystemZCP::SystemZCPModifier getModifier() const { return Modifier; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 0000000..fda33de
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,535 @@
+//===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZCallingConv.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm,
+ const SystemZSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
+ -SystemZMC::CallFrameSize),
+ TM(tm),
+ STI(sti) {
+ // The ABI-defined register save slots, relative to the incoming stack
+ // pointer.
+ static const unsigned SpillOffsetTable[][2] = {
+ { SystemZ::R2D, 0x10 },
+ { SystemZ::R3D, 0x18 },
+ { SystemZ::R4D, 0x20 },
+ { SystemZ::R5D, 0x28 },
+ { SystemZ::R6D, 0x30 },
+ { SystemZ::R7D, 0x38 },
+ { SystemZ::R8D, 0x40 },
+ { SystemZ::R9D, 0x48 },
+ { SystemZ::R10D, 0x50 },
+ { SystemZ::R11D, 0x58 },
+ { SystemZ::R12D, 0x60 },
+ { SystemZ::R13D, 0x68 },
+ { SystemZ::R14D, 0x70 },
+ { SystemZ::R15D, 0x78 },
+ { SystemZ::F0D, 0x80 },
+ { SystemZ::F2D, 0x88 },
+ { SystemZ::F4D, 0x90 },
+ { SystemZ::F6D, 0x98 }
+ };
+
+ // Create a mapping from register number to save slot offset.
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+ for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
+ RegSpillOffsets[SpillOffsetTable[I][0]] = SpillOffsetTable[I][1];
+}
+
+void SystemZFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFFrame = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ bool HasFP = hasFP(MF);
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ bool IsVarArg = MF.getFunction()->isVarArg();
+
+ // va_start stores incoming FPR varargs in the normal way, but delegates
+ // the saving of incoming GPR varargs to spillCalleeSavedRegisters().
+ // Record these pending uses, which typically include the call-saved
+ // argument register R6D.
+ if (IsVarArg)
+ for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
+ MRI.setPhysRegUsed(SystemZ::ArgGPRs[I]);
+
+ // If the function requires a frame pointer, record that the hard
+ // frame pointer will be clobbered.
+ if (HasFP)
+ MRI.setPhysRegUsed(SystemZ::R11D);
+
+ // If the function calls other functions, record that the return
+ // address register will be clobbered.
+ if (MFFrame->hasCalls())
+ MRI.setPhysRegUsed(SystemZ::R14D);
+
+ // If we are saving GPRs other than the stack pointer, we might as well
+ // save and restore the stack pointer at the same time, via STMG and LMG.
+ // This allows the deallocation to be done by the LMG, rather than needing
+ // a separate %r15 addition.
+ const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned I = 0; CSRegs[I]; ++I) {
+ unsigned Reg = CSRegs[I];
+ if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) {
+ MRI.setPhysRegUsed(SystemZ::R15D);
+ break;
+ }
+ }
+}
+
+// Add GPR64 to the save instruction being built by MIB, which is in basic
+// block MBB. IsImplicit says whether this is an explicit operand to the
+// instruction, or an implicit one that comes between the explicit start
+// and end registers.
+static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
+ const SystemZTargetMachine &TM,
+ unsigned GPR64, bool IsImplicit) {
+ const SystemZRegisterInfo *RI = TM.getRegisterInfo();
+ unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_32bit);
+ bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
+ if (!IsLive || !IsImplicit) {
+ MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
+ if (!IsLive)
+ MBB.addLiveIn(GPR64);
+ }
+}
+
+bool SystemZFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ bool IsVarArg = MF.getFunction()->isVarArg();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Scan the call-saved GPRs and find the bounds of the register spill area.
+ unsigned SavedGPRFrameSize = 0;
+ unsigned LowGPR = 0;
+ unsigned HighGPR = SystemZ::R15D;
+ unsigned StartOffset = -1U;
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::GR64BitRegClass.contains(Reg)) {
+ SavedGPRFrameSize += 8;
+ unsigned Offset = RegSpillOffsets[Reg];
+ assert(Offset && "Unexpected GPR save");
+ if (StartOffset > Offset) {
+ LowGPR = Reg;
+ StartOffset = Offset;
+ }
+ }
+ }
+
+ // Save information about the range and location of the call-saved
+ // registers, for use by the epilogue inserter.
+ ZFI->setSavedGPRFrameSize(SavedGPRFrameSize);
+ ZFI->setLowSavedGPR(LowGPR);
+ ZFI->setHighSavedGPR(HighGPR);
+
+ // Include the GPR varargs, if any. R6D is call-saved, so would
+ // be included by the loop above, but we also need to handle the
+ // call-clobbered argument registers.
+ if (IsVarArg) {
+ unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+ if (FirstGPR < SystemZ::NumArgGPRs) {
+ unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
+ unsigned Offset = RegSpillOffsets[Reg];
+ if (StartOffset > Offset) {
+ LowGPR = Reg; StartOffset = Offset;
+ }
+ }
+ }
+
+ // Save GPRs
+ if (LowGPR) {
+ assert(LowGPR != HighGPR && "Should be saving %r15 and something else");
+
+ // Build an STMG instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
+
+ // Add the explicit register operands.
+ addSavedGPR(MBB, MIB, TM, LowGPR, false);
+ addSavedGPR(MBB, MIB, TM, HighGPR, false);
+
+ // Add the address.
+ MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+
+ // Make sure all call-saved GPRs are included as operands and are
+ // marked as live on entry.
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::GR64BitRegClass.contains(Reg))
+ addSavedGPR(MBB, MIB, TM, Reg, true);
+ }
+
+ // ...likewise GPR varargs.
+ if (IsVarArg)
+ for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
+ addSavedGPR(MBB, MIB, TM, SystemZ::ArgGPRs[I], true);
+ }
+
+ // Save FPRs in the normal TargetInstrInfo way.
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ }
+ }
+
+ return true;
+}
+
+bool SystemZFrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ bool HasFP = hasFP(MF);
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Restore FPRs in the normal TargetInstrInfo way.
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg))
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ }
+
+ // Restore call-saved GPRs (but not call-clobbered varargs, which at
+ // this point might hold return values).
+ unsigned LowGPR = ZFI->getLowSavedGPR();
+ unsigned HighGPR = ZFI->getHighSavedGPR();
+ unsigned StartOffset = RegSpillOffsets[LowGPR];
+ if (LowGPR) {
+ // If we saved any of %r2-%r5 as varargs, we should also be saving
+ // and restoring %r6. If we're saving %r6 or above, we should be
+ // restoring it too.
+ assert(LowGPR != HighGPR && "Should be loading %r15 and something else");
+
+ // Build an LMG instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
+
+ // Add the explicit register operands.
+ MIB.addReg(LowGPR, RegState::Define);
+ MIB.addReg(HighGPR, RegState::Define);
+
+ // Add the address.
+ MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D);
+ MIB.addImm(StartOffset);
+
+ // Do a second scan adding regs as being defined by instruction
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (Reg != LowGPR && Reg != HighGPR)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+ }
+
+ return true;
+}
+
+// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
+static void emitIncrement(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const DebugLoc &DL,
+ unsigned Reg, int64_t NumBytes,
+ const TargetInstrInfo *TII) {
+ while (NumBytes) {
+ unsigned Opcode;
+ int64_t ThisVal = NumBytes;
+ if (isInt<16>(NumBytes))
+ Opcode = SystemZ::AGHI;
+ else {
+ Opcode = SystemZ::AGFI;
+ // Make sure we maintain 8-byte stack alignment.
+ int64_t MinVal = -int64_t(1) << 31;
+ int64_t MaxVal = (int64_t(1) << 31) - 8;
+ if (ThisVal < MinVal)
+ ThisVal = MinVal;
+ else if (ThisVal > MaxVal)
+ ThisVal = MaxVal;
+ }
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg)
+ .addReg(Reg).addImm(ThisVal);
+ // The PSW implicit def is dead.
+ MI->getOperand(3).setIsDead();
+ NumBytes -= ThisVal;
+ }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFFrame = MF.getFrameInfo();
+ const SystemZInstrInfo *ZII =
+ static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineModuleInfo &MMI = MF.getMMI();
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo();
+ bool HasFP = hasFP(MF);
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // The current offset of the stack pointer from the CFA.
+ int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
+
+ if (ZFI->getLowSavedGPR()) {
+ // Skip over the GPR saves.
+ if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG)
+ ++MBBI;
+ else
+ llvm_unreachable("Couldn't skip over GPR saves");
+
+ // Add CFI for the GPR saves.
+ MCSymbol *GPRSaveLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL,
+ ZII->get(TargetOpcode::PROLOG_LABEL)).addSym(GPRSaveLabel);
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ unsigned Reg = I->getReg();
+ if (SystemZ::GR64BitRegClass.contains(Reg)) {
+ int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg];
+ MachineLocation StackSlot(MachineLocation::VirtualFP, Offset);
+ MachineLocation RegValue(Reg);
+ Moves.push_back(MachineMove(GPRSaveLabel, StackSlot, RegValue));
+ }
+ }
+ }
+
+ uint64_t StackSize = getAllocatedStackSize(MF);
+ if (StackSize) {
+ // Allocate StackSize bytes.
+ int64_t Delta = -int64_t(StackSize);
+ emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
+
+ // Add CFI for the allocation.
+ MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+ .addSym(AdjustSPLabel);
+ MachineLocation FPDest(MachineLocation::VirtualFP);
+ MachineLocation FPSrc(MachineLocation::VirtualFP, SPOffsetFromCFA + Delta);
+ Moves.push_back(MachineMove(AdjustSPLabel, FPDest, FPSrc));
+ SPOffsetFromCFA += Delta;
+ }
+
+ if (HasFP) {
+ // Copy the base of the frame to R11.
+ BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R11D)
+ .addReg(SystemZ::R15D);
+
+ // Add CFI for the new frame location.
+ MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+ .addSym(SetFPLabel);
+ MachineLocation HardFP(SystemZ::R11D);
+ MachineLocation VirtualFP(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(SetFPLabel, HardFP, VirtualFP));
+
+ // Mark the FramePtr as live at the beginning of every block except
+ // the entry block. (We'll have marked R11 as live on entry when
+ // saving the GPRs.)
+ for (MachineFunction::iterator
+ I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I)
+ I->addLiveIn(SystemZ::R11D);
+ }
+
+ // Skip over the FPR saves.
+ MCSymbol *FPRSaveLabel = 0;
+ for (std::vector<CalleeSavedInfo>::const_iterator
+ I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+ unsigned Reg = I->getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg)) {
+ if (MBBI != MBB.end() &&
+ (MBBI->getOpcode() == SystemZ::STD ||
+ MBBI->getOpcode() == SystemZ::STDY))
+ ++MBBI;
+ else
+ llvm_unreachable("Couldn't skip over FPR save");
+
+ // Add CFI for the this save.
+ if (!FPRSaveLabel)
+ FPRSaveLabel = MMI.getContext().CreateTempSymbol();
+ unsigned Reg = I->getReg();
+ int64_t Offset = getFrameIndexOffset(MF, I->getFrameIdx());
+ MachineLocation Slot(MachineLocation::VirtualFP,
+ SPOffsetFromCFA + Offset);
+ MachineLocation RegValue(Reg);
+ Moves.push_back(MachineMove(FPRSaveLabel, Slot, RegValue));
+ }
+ }
+ // Complete the CFI for the FPR saves, modelling them as taking effect
+ // after the last save.
+ if (FPRSaveLabel)
+ BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+ .addSym(FPRSaveLabel);
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SystemZInstrInfo *ZII =
+ static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+ // Skip the return instruction.
+ assert(MBBI->getOpcode() == SystemZ::RET &&
+ "Can only insert epilogue into returning blocks");
+
+ uint64_t StackSize = getAllocatedStackSize(MF);
+ if (ZFI->getLowSavedGPR()) {
+ --MBBI;
+ unsigned Opcode = MBBI->getOpcode();
+ if (Opcode != SystemZ::LMG)
+ llvm_unreachable("Expected to see callee-save register restore code");
+
+ unsigned AddrOpNo = 2;
+ DebugLoc DL = MBBI->getDebugLoc();
+ uint64_t Offset = StackSize + MBBI->getOperand(AddrOpNo + 1).getImm();
+ unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
+
+ // If the offset is too large, use the largest stack-aligned offset
+ // and add the rest to the base register (the stack or frame pointer).
+ if (!NewOpcode) {
+ uint64_t NumBytes = Offset - 0x7fff8;
+ emitIncrement(MBB, MBBI, DL, MBBI->getOperand(AddrOpNo).getReg(),
+ NumBytes, ZII);
+ Offset -= NumBytes;
+ NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
+ assert(NewOpcode && "No restore instruction available");
+ }
+
+ MBBI->setDesc(ZII->get(NewOpcode));
+ MBBI->getOperand(AddrOpNo + 1).ChangeToImmediate(Offset);
+ } else if (StackSize) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ emitIncrement(MBB, MBBI, DL, SystemZ::R15D, StackSize, ZII);
+ }
+}
+
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+ return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MF.getFrameInfo()->hasVarSizedObjects() ||
+ MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+
+ // Start with the offset of FI from the top of the caller-allocated frame
+ // (i.e. the top of the 160 bytes allocated by the caller). This initial
+ // offset is therefore negative.
+ int64_t Offset = (MFFrame->getObjectOffset(FI) +
+ MFFrame->getOffsetAdjustment());
+ if (FI >= 0)
+ // Non-fixed objects are allocated below the incoming stack pointer.
+ // Account for the space at the top of the frame that we choose not
+ // to allocate.
+ Offset += getUnallocatedTopBytes(MF);
+
+ // Make the offset relative to the incoming stack pointer.
+ Offset -= getOffsetOfLocalArea();
+
+ // Make the offset relative to the bottom of the frame.
+ Offset += getAllocatedStackSize(MF);
+
+ return Offset;
+}
+
+uint64_t SystemZFrameLowering::
+getUnallocatedTopBytes(const MachineFunction &MF) const {
+ return MF.getInfo<SystemZMachineFunctionInfo>()->getSavedGPRFrameSize();
+}
+
+uint64_t SystemZFrameLowering::
+getAllocatedStackSize(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+
+ // Start with the size of the local variables and spill slots.
+ uint64_t StackSize = MFFrame->getStackSize();
+
+ // Remove any bytes that we choose not to allocate.
+ StackSize -= getUnallocatedTopBytes(MF);
+
+ // Include space for an emergency spill slot, if one might be needed.
+ StackSize += getEmergencySpillSlotSize(MF);
+
+ // We need to allocate the ABI-defined 160-byte base area whenever
+ // we allocate stack space for our own use and whenever we call another
+ // function.
+ if (StackSize || MFFrame->hasVarSizedObjects() || MFFrame->hasCalls())
+ StackSize += SystemZMC::CallFrameSize;
+
+ return StackSize;
+}
+
+unsigned SystemZFrameLowering::
+getEmergencySpillSlotSize(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+ uint64_t MaxReach = MFFrame->getStackSize() + SystemZMC::CallFrameSize * 2;
+ return isUInt<12>(MaxReach) ? 0 : 8;
+}
+
+unsigned SystemZFrameLowering::
+getEmergencySpillSlotOffset(const MachineFunction &MF) const {
+ assert(getEmergencySpillSlotSize(MF) && "No emergency spill slot");
+ return SystemZMC::CallFrameSize;
+}
+
+bool
+SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ // The ABI requires us to allocate 160 bytes of stack space for the callee,
+ // with any outgoing stack arguments being placed above that. It seems
+ // better to make that area a permanent feature of the frame even if
+ // we're using a frame pointer.
+ return true;
+}
+
+void SystemZFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ switch (MI->getOpcode()) {
+ case SystemZ::ADJCALLSTACKDOWN:
+ case SystemZ::ADJCALLSTACKUP:
+ assert(hasReservedCallFrame(MF) &&
+ "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+ MBB.erase(MI);
+ break;
+
+ default:
+ llvm_unreachable("Unexpected call frame instruction");
+ }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 0000000..5ca049c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,93 @@
+//===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZFRAMELOWERING_H
+#define SYSTEMZFRAMELOWERING_H
+
+#include "SystemZSubtarget.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class SystemZTargetMachine;
+class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+ IndexedMap<unsigned> RegSpillOffsets;
+
+protected:
+ const SystemZTargetMachine &TM;
+ const SystemZSubtarget &STI;
+
+public:
+ SystemZFrameLowering(const SystemZTargetMachine &tm,
+ const SystemZSubtarget &sti);
+
+ // Override FrameLowering.
+ virtual void
+ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const LLVM_OVERRIDE;
+ virtual bool
+ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const
+ LLVM_OVERRIDE;
+ virtual bool
+ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBII,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const
+ LLVM_OVERRIDE;
+ virtual void emitPrologue(MachineFunction &MF) const LLVM_OVERRIDE;
+ virtual void emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const LLVM_OVERRIDE;
+ virtual bool hasFP(const MachineFunction &MF) const LLVM_OVERRIDE;
+ virtual int getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const LLVM_OVERRIDE;
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const
+ LLVM_OVERRIDE;
+ virtual void
+ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const
+ LLVM_OVERRIDE;
+
+ // The target-independent code automatically allocates save slots for
+ // call-saved GPRs. However, we don't need those slots for SystemZ,
+ // because the ABI sets aside GPR save slots in the caller-allocated part
+ // of the frame. Since the target-independent code puts this unneeded
+ // area at the top of the callee-allocated part of frame, we choose not
+ // to allocate it and adjust the offsets accordingly. Return the
+ // size of this unallocated area.
+ // FIXME: seems a bit hackish.
+ uint64_t getUnallocatedTopBytes(const MachineFunction &MF) const;
+
+ // Return the number of bytes in the callee-allocated part of the frame.
+ uint64_t getAllocatedStackSize(const MachineFunction &MF) const;
+
+ // Return the number of frame bytes that should be reserved for
+ // an emergency spill slot, for use by the register scaveneger.
+ // Return 0 if register scaveging won't be needed.
+ unsigned getEmergencySpillSlotSize(const MachineFunction &MF) const;
+
+ // Return the offset from the frame pointer of the emergency spill slot,
+ // which always fits within a 12-bit unsigned displacement field.
+ // Only valid if getEmergencySpillSlotSize(MF) returns nonzero.
+ unsigned getEmergencySpillSlotOffset(const MachineFunction &MF) const;
+
+ // Return the byte offset from the incoming stack pointer of Reg's
+ // ABI-defined save slot. Return 0 if no slot is defined for Reg.
+ unsigned getRegSpillOffset(unsigned Reg) const {
+ return RegSpillOffsets[Reg];
+ }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 0000000..d436ba9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,616 @@
+//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// Used to build addressing modes.
+struct SystemZAddressingMode {
+ // The shape of the address.
+ enum AddrForm {
+ // base+displacement
+ FormBD,
+
+ // base+displacement+index for load and store operands
+ FormBDXNormal,
+
+ // base+displacement+index for load address operands
+ FormBDXLA,
+
+ // base+displacement+index+ADJDYNALLOC
+ FormBDXDynAlloc
+ };
+ AddrForm Form;
+
+ // The type of displacement. The enum names here correspond directly
+ // to the definitions in SystemZOperand.td. We could split them into
+ // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it.
+ enum DispRange {
+ Disp12Only,
+ Disp12Pair,
+ Disp20Only,
+ Disp20Only128,
+ Disp20Pair
+ };
+ DispRange DR;
+
+ // The parts of the address. The address is equivalent to:
+ //
+ // Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0)
+ SDValue Base;
+ int64_t Disp;
+ SDValue Index;
+ bool IncludesDynAlloc;
+
+ SystemZAddressingMode(AddrForm form, DispRange dr)
+ : Form(form), DR(dr), Base(), Disp(0), Index(),
+ IncludesDynAlloc(false) {}
+
+ // True if the address can have an index register.
+ bool hasIndexField() { return Form != FormBD; }
+
+ // True if the address can (and must) include ADJDYNALLOC.
+ bool isDynAlloc() { return Form == FormBDXDynAlloc; }
+
+ void dump() {
+ errs() << "SystemZAddressingMode " << this << '\n';
+
+ errs() << " Base ";
+ if (Base.getNode() != 0)
+ Base.getNode()->dump();
+ else
+ errs() << "null\n";
+
+ if (hasIndexField()) {
+ errs() << " Index ";
+ if (Index.getNode() != 0)
+ Index.getNode()->dump();
+ else
+ errs() << "null\n";
+ }
+
+ errs() << " Disp " << Disp;
+ if (IncludesDynAlloc)
+ errs() << " + ADJDYNALLOC";
+ errs() << '\n';
+ }
+};
+
+class SystemZDAGToDAGISel : public SelectionDAGISel {
+ const SystemZTargetLowering &Lowering;
+ const SystemZSubtarget &Subtarget;
+
+ // Used by SystemZOperands.td to create integer constants.
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+ }
+
+ // Try to fold more of the base or index of AM into AM, where IsBase
+ // selects between the base and index.
+ bool expandAddress(SystemZAddressingMode &AM, bool IsBase);
+
+ // Try to describe N in AM, returning true on success.
+ bool selectAddress(SDValue N, SystemZAddressingMode &AM);
+
+ // Extract individual target operands from matched address AM.
+ void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
+ SDValue &Base, SDValue &Disp);
+ void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+
+ // Try to match Addr as a FormBD address with displacement type DR.
+ // Return true on success, storing the base and displacement in
+ // Base and Disp respectively.
+ bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+ SDValue &Base, SDValue &Disp);
+
+ // Try to match Addr as a FormBDX* address of form Form with
+ // displacement type DR. Return true on success, storing the base,
+ // displacement and index in Base, Disp and Index respectively.
+ bool selectBDXAddr(SystemZAddressingMode::AddrForm Form,
+ SystemZAddressingMode::DispRange DR, SDValue Addr,
+ SDValue &Base, SDValue &Disp, SDValue &Index);
+
+ // PC-relative address matching routines used by SystemZOperands.td.
+ bool selectPCRelAddress(SDValue Addr, SDValue &Target) {
+ if (Addr.getOpcode() == SystemZISD::PCREL_WRAPPER) {
+ Target = Addr.getOperand(0);
+ return true;
+ }
+ return false;
+ }
+
+ // BD matching routines used by SystemZOperands.td.
+ bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp);
+ }
+ bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+ }
+ bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp);
+ }
+ bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+ }
+
+ // BDX matching routines used by SystemZOperands.td.
+ bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp12Only,
+ Addr, Base, Disp, Index);
+ }
+ bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp12Pair,
+ Addr, Base, Disp, Index);
+ }
+ bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc,
+ SystemZAddressingMode::Disp12Only,
+ Addr, Base, Disp, Index);
+ }
+ bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp20Only,
+ Addr, Base, Disp, Index);
+ }
+ bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp20Only128,
+ Addr, Base, Disp, Index);
+ }
+ bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+ SystemZAddressingMode::Disp20Pair,
+ Addr, Base, Disp, Index);
+ }
+ bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
+ SystemZAddressingMode::Disp12Pair,
+ Addr, Base, Disp, Index);
+ }
+ bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+ SDValue &Index) {
+ return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
+ SystemZAddressingMode::Disp20Pair,
+ Addr, Base, Disp, Index);
+ }
+
+ // If Op0 is null, then Node is a constant that can be loaded using:
+ //
+ // (Opcode UpperVal LowerVal)
+ //
+ // If Op0 is nonnull, then Node can be implemented using:
+ //
+ // (Opcode (Opcode Op0 UpperVal) LowerVal)
+ SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
+ uint64_t UpperVal, uint64_t LowerVal);
+
+public:
+ SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel),
+ Lowering(*TM.getTargetLowering()),
+ Subtarget(*TM.getSubtargetImpl()) { }
+
+ // Override MachineFunctionPass.
+ virtual const char *getPassName() const LLVM_OVERRIDE {
+ return "SystemZ DAG->DAG Pattern Instruction Selection";
+ }
+
+ // Override SelectionDAGISel.
+ virtual SDNode *Select(SDNode *Node) LLVM_OVERRIDE;
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps)
+ LLVM_OVERRIDE;
+
+ // Include the pieces autogenerated from the target description.
+ #include "SystemZGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+// Return true if Val should be selected as a displacement for an address
+// with range DR. Here we're interested in the range of both the instruction
+// described by DR and of any pairing instruction.
+static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {
+ switch (DR) {
+ case SystemZAddressingMode::Disp12Only:
+ return isUInt<12>(Val);
+
+ case SystemZAddressingMode::Disp12Pair:
+ case SystemZAddressingMode::Disp20Only:
+ case SystemZAddressingMode::Disp20Pair:
+ return isInt<20>(Val);
+
+ case SystemZAddressingMode::Disp20Only128:
+ return isInt<20>(Val) && isInt<20>(Val + 8);
+ }
+ llvm_unreachable("Unhandled displacement range");
+}
+
+// Change the base or index in AM to Value, where IsBase selects
+// between the base and index.
+static void changeComponent(SystemZAddressingMode &AM, bool IsBase,
+ SDValue Value) {
+ if (IsBase)
+ AM.Base = Value;
+ else
+ AM.Index = Value;
+}
+
+// The base or index of AM is equivalent to Value + ADJDYNALLOC,
+// where IsBase selects between the base and index. Try to fold the
+// ADJDYNALLOC into AM.
+static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase,
+ SDValue Value) {
+ if (AM.isDynAlloc() && !AM.IncludesDynAlloc) {
+ changeComponent(AM, IsBase, Value);
+ AM.IncludesDynAlloc = true;
+ return true;
+ }
+ return false;
+}
+
+// The base of AM is equivalent to Base + Index. Try to use Index as
+// the index register.
+static bool expandIndex(SystemZAddressingMode &AM, SDValue Base,
+ SDValue Index) {
+ if (AM.hasIndexField() && !AM.Index.getNode()) {
+ AM.Base = Base;
+ AM.Index = Index;
+ return true;
+ }
+ return false;
+}
+
+// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects
+// between the base and index. Try to fold Op1 into AM's displacement.
+static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
+ SDValue Op0, ConstantSDNode *Op1) {
+ // First try adjusting the displacement.
+ int64_t TestDisp = AM.Disp + Op1->getSExtValue();
+ if (selectDisp(AM.DR, TestDisp)) {
+ changeComponent(AM, IsBase, Op0);
+ AM.Disp = TestDisp;
+ return true;
+ }
+
+ // We could consider forcing the displacement into a register and
+ // using it as an index, but it would need to be carefully tuned.
+ return false;
+}
+
+bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
+ bool IsBase) {
+ SDValue N = IsBase ? AM.Base : AM.Index;
+ unsigned Opcode = N.getOpcode();
+ if (Opcode == ISD::TRUNCATE) {
+ N = N.getOperand(0);
+ Opcode = N.getOpcode();
+ }
+ if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(N)) {
+ SDValue Op0 = N.getOperand(0);
+ SDValue Op1 = N.getOperand(1);
+
+ unsigned Op0Code = Op0->getOpcode();
+ unsigned Op1Code = Op1->getOpcode();
+
+ if (Op0Code == SystemZISD::ADJDYNALLOC)
+ return expandAdjDynAlloc(AM, IsBase, Op1);
+ if (Op1Code == SystemZISD::ADJDYNALLOC)
+ return expandAdjDynAlloc(AM, IsBase, Op0);
+
+ if (Op0Code == ISD::Constant)
+ return expandDisp(AM, IsBase, Op1, cast<ConstantSDNode>(Op0));
+ if (Op1Code == ISD::Constant)
+ return expandDisp(AM, IsBase, Op0, cast<ConstantSDNode>(Op1));
+
+ if (IsBase && expandIndex(AM, Op0, Op1))
+ return true;
+ }
+ return false;
+}
+
+// Return true if an instruction with displacement range DR should be
+// used for displacement value Val. selectDisp(DR, Val) must already hold.
+static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {
+ assert(selectDisp(DR, Val) && "Invalid displacement");
+ switch (DR) {
+ case SystemZAddressingMode::Disp12Only:
+ case SystemZAddressingMode::Disp20Only:
+ case SystemZAddressingMode::Disp20Only128:
+ return true;
+
+ case SystemZAddressingMode::Disp12Pair:
+ // Use the other instruction if the displacement is too large.
+ return isUInt<12>(Val);
+
+ case SystemZAddressingMode::Disp20Pair:
+ // Use the other instruction if the displacement is small enough.
+ return !isUInt<12>(Val);
+ }
+ llvm_unreachable("Unhandled displacement range");
+}
+
+// Return true if Base + Disp + Index should be performed by LA(Y).
+static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) {
+ // Don't use LA(Y) for constants.
+ if (!Base)
+ return false;
+
+ // Always use LA(Y) for frame addresses, since we know that the destination
+ // register is almost always (perhaps always) going to be different from
+ // the frame register.
+ if (Base->getOpcode() == ISD::FrameIndex)
+ return true;
+
+ if (Disp) {
+ // Always use LA(Y) if there is a base, displacement and index.
+ if (Index)
+ return true;
+
+ // Always use LA if the displacement is small enough. It should always
+ // be no worse than AGHI (and better if it avoids a move).
+ if (isUInt<12>(Disp))
+ return true;
+
+ // For similar reasons, always use LAY if the constant is too big for AGHI.
+ // LAY should be no worse than AGFI.
+ if (!isInt<16>(Disp))
+ return true;
+ } else {
+ // Don't use LA for plain registers.
+ if (!Index)
+ return false;
+
+ // Don't use LA for plain addition if the index operand is only used
+ // once. It should be a natural two-operand addition in that case.
+ if (Index->hasOneUse())
+ return false;
+
+ // Prefer addition if the second operation is sign-extended, in the
+ // hope of using AGF.
+ unsigned IndexOpcode = Index->getOpcode();
+ if (IndexOpcode == ISD::SIGN_EXTEND ||
+ IndexOpcode == ISD::SIGN_EXTEND_INREG)
+ return false;
+ }
+
+ // Don't use LA for two-operand addition if either operand is only
+ // used once. The addition instructions are better in that case.
+ if (Base->hasOneUse())
+ return false;
+
+ return true;
+}
+
+// Return true if Addr is suitable for AM, updating AM if so.
+bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
+ SystemZAddressingMode &AM) {
+ // Start out assuming that the address will need to be loaded separately,
+ // then try to extend it as much as we can.
+ AM.Base = Addr;
+
+ // First try treating the address as a constant.
+ if (Addr.getOpcode() == ISD::Constant &&
+ expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr)))
+ ;
+ else
+ // Otherwise try expanding each component.
+ while (expandAddress(AM, true) ||
+ (AM.Index.getNode() && expandAddress(AM, false)))
+ continue;
+
+ // Reject cases where it isn't profitable to use LA(Y).
+ if (AM.Form == SystemZAddressingMode::FormBDXLA &&
+ !shouldUseLA(AM.Base.getNode(), AM.Disp, AM.Index.getNode()))
+ return false;
+
+ // Reject cases where the other instruction in a pair should be used.
+ if (!isValidDisp(AM.DR, AM.Disp))
+ return false;
+
+ // Make sure that ADJDYNALLOC is included where necessary.
+ if (AM.isDynAlloc() && !AM.IncludesDynAlloc)
+ return false;
+
+ DEBUG(AM.dump());
+ return true;
+}
+
+// Insert a node into the DAG at least before Pos. This will reposition
+// the node as needed, and will assign it a node ID that is <= Pos's ID.
+// Note that this does *not* preserve the uniqueness of node IDs!
+// The selection DAG must no longer depend on their uniqueness when this
+// function is used.
+static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
+ if (N.getNode()->getNodeId() == -1 ||
+ N.getNode()->getNodeId() > Pos->getNodeId()) {
+ DAG->RepositionNode(Pos, N.getNode());
+ N.getNode()->setNodeId(Pos->getNodeId());
+ }
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
+ EVT VT, SDValue &Base,
+ SDValue &Disp) {
+ Base = AM.Base;
+ if (!Base.getNode())
+ // Register 0 means "no base". This is mostly useful for shifts.
+ Base = CurDAG->getRegister(0, VT);
+ else if (Base.getOpcode() == ISD::FrameIndex) {
+ // Lower a FrameIndex to a TargetFrameIndex.
+ int64_t FrameIndex = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FrameIndex, VT);
+ } else if (Base.getValueType() != VT) {
+ // Truncate values from i64 to i32, for shifts.
+ assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 &&
+ "Unexpected truncation");
+ DebugLoc DL = Base.getDebugLoc();
+ SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base);
+ insertDAGNode(CurDAG, Base.getNode(), Trunc);
+ Base = Trunc;
+ }
+
+ // Lower the displacement to a TargetConstant.
+ Disp = CurDAG->getTargetConstant(AM.Disp, VT);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
+ EVT VT, SDValue &Base,
+ SDValue &Disp, SDValue &Index) {
+ getAddressOperands(AM, VT, Base, Disp);
+
+ Index = AM.Index;
+ if (!Index.getNode())
+ // Register 0 means "no index".
+ Index = CurDAG->getRegister(0, VT);
+}
+
+bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
+ SDValue Addr, SDValue &Base,
+ SDValue &Disp) {
+ SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR);
+ if (!selectAddress(Addr, AM))
+ return false;
+
+ getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+ return true;
+}
+
+bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
+ SystemZAddressingMode::DispRange DR,
+ SDValue Addr, SDValue &Base,
+ SDValue &Disp, SDValue &Index) {
+ SystemZAddressingMode AM(Form, DR);
+ if (!selectAddress(Addr, AM))
+ return false;
+
+ getAddressOperands(AM, Addr.getValueType(), Base, Disp, Index);
+ return true;
+}
+
+SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
+ SDValue Op0, uint64_t UpperVal,
+ uint64_t LowerVal) {
+ EVT VT = Node->getValueType(0);
+ DebugLoc DL = Node->getDebugLoc();
+ SDValue Upper = CurDAG->getConstant(UpperVal, VT);
+ if (Op0.getNode())
+ Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
+ Upper = SDValue(Select(Upper.getNode()), 0);
+
+ SDValue Lower = CurDAG->getConstant(LowerVal, VT);
+ SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
+ return Or.getNode();
+}
+
+SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ return 0;
+ }
+
+ unsigned Opcode = Node->getOpcode();
+ switch (Opcode) {
+ case ISD::OR:
+ case ISD::XOR:
+ // If this is a 64-bit operation in which both 32-bit halves are nonzero,
+ // split the operation into two.
+ if (Node->getValueType(0) == MVT::i64)
+ if (ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+ uint64_t Val = Op1->getZExtValue();
+ if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val))
+ Node = splitLargeImmediate(Opcode, Node, Node->getOperand(0),
+ Val - uint32_t(Val), uint32_t(Val));
+ }
+ break;
+
+ case ISD::Constant:
+ // If this is a 64-bit constant that is out of the range of LLILF,
+ // LLIHF and LGFI, split it into two 32-bit pieces.
+ if (Node->getValueType(0) == MVT::i64) {
+ uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue();
+ if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val))
+ Node = splitLargeImmediate(ISD::OR, Node, SDValue(),
+ Val - uint32_t(Val), uint32_t(Val));
+ }
+ break;
+
+ case ISD::ATOMIC_LOAD_SUB:
+ // Try to convert subtractions of constants to additions.
+ if (ConstantSDNode *Op2 = dyn_cast<ConstantSDNode>(Node->getOperand(2))) {
+ uint64_t Value = -Op2->getZExtValue();
+ EVT VT = Node->getValueType(0);
+ if (VT == MVT::i32 || isInt<32>(Value)) {
+ SDValue Ops[] = { Node->getOperand(0), Node->getOperand(1),
+ CurDAG->getConstant(int32_t(Value), VT) };
+ Node = CurDAG->MorphNodeTo(Node, ISD::ATOMIC_LOAD_ADD,
+ Node->getVTList(), Ops, array_lengthof(Ops));
+ }
+ }
+ break;
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ errs() << "\n";
+ );
+ return ResNode;
+}
+
+bool SystemZDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "Unexpected constraint code");
+ // Accept addresses with short displacements, which are compatible
+ // with Q, R, S and T. But keep the index operand for future expansion.
+ SDValue Base, Disp, Index;
+ if (!selectBDXAddr(SystemZAddressingMode::FormBD,
+ SystemZAddressingMode::Disp12Only,
+ Op, Base, Disp, Index))
+ return true;
+ OutOps.push_back(Base);
+ OutOps.push_back(Disp);
+ OutOps.push_back(Index);
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 0000000..eb21b31
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,2233 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-lower"
+
+#include "SystemZISelLowering.h"
+#include "SystemZCallingConv.h"
+#include "SystemZConstantPoolValue.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+// Classify VT as either 32 or 64 bit.
+static bool is32Bit(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i32:
+ return true;
+ case MVT::i64:
+ return false;
+ default:
+ llvm_unreachable("Unsupported type");
+ }
+}
+
+// Return a version of MachineOperand that can be safely used before the
+// final use.
+static MachineOperand earlyUseOperand(MachineOperand Op) {
+ if (Op.isReg())
+ Op.setIsKill(false);
+ return Op;
+}
+
+SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
+ : TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+ MVT PtrVT = getPointerTy();
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
+ addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
+ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
+ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
+ addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+
+ // Set up special registers.
+ setExceptionPointerRegister(SystemZ::R6D);
+ setExceptionSelectorRegister(SystemZ::R7D);
+ setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+
+ // TODO: It may be better to default to latency-oriented scheduling, however
+ // LLVM's current latency-oriented scheduler can't handle physreg definitions
+ // such as SystemZ has with PSW, so set this to the register-pressure
+ // scheduler, because it can.
+ setSchedulingPreference(Sched::RegPressure);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+ // Instructions are strings of 2-byte aligned 2-byte values.
+ setMinFunctionAlignment(2);
+
+ // Handle operations that are handled in a similar way for all types.
+ for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+ I <= MVT::LAST_FP_VALUETYPE;
+ ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND).
+ setOperationAction(ISD::SETCC, VT, Expand);
+
+ // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
+ setOperationAction(ISD::SELECT, VT, Expand);
+
+ // Lower SELECT_CC and BR_CC into separate comparisons and branches.
+ setOperationAction(ISD::SELECT_CC, VT, Custom);
+ setOperationAction(ISD::BR_CC, VT, Custom);
+ }
+ }
+
+ // Expand jump table branches as address arithmetic followed by an
+ // indirect jump.
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // Expand BRCOND into a BR_CC (see above).
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ // Handle integer types.
+ for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+ I <= MVT::LAST_INTEGER_VALUETYPE;
+ ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ // Expand individual DIV and REMs into DIVREMs.
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Custom);
+
+ // Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP.
+ // FIXME: probably much too conservative.
+ setOperationAction(ISD::ATOMIC_LOAD, VT, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, VT, Expand);
+
+ // No special instructions for these.
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+
+ // Use *MUL_LOHI where possible and a wider multiplication otherwise.
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+
+ // We have instructions for signed but not unsigned FP conversion.
+ setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ }
+ }
+
+ // Type legalization will convert 8- and 16-bit atomic operations into
+ // forms that operate on i32s (but still keeping the original memory VT).
+ // Lower them into full i32 operations.
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+
+ // We have instructions for signed but not unsigned FP conversion.
+ // Handle unsigned 32-bit types as signed 64-bit types.
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+
+ // We have native support for a 64-bit CTLZ, via FLOGR.
+ setOperationAction(ISD::CTLZ, MVT::i32, Promote);
+ setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+
+ // Give LowerOperation the chance to replace 64-bit ORs with subregs.
+ setOperationAction(ISD::OR, MVT::i64, Custom);
+
+ // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR),
+ // but they aren't really worth using. There is no 64-bit SMUL_LOHI,
+ // but there is a 64-bit UMUL_LOHI: MLGR.
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
+
+ // FIXME: Can we support these natively?
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+
+ // We have native instructions for i8, i16 and i32 extensions, but not i1.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Handle the various types of symbolic address.
+ setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+ setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+ setOperationAction(ISD::BlockAddress, PtrVT, Custom);
+ setOperationAction(ISD::JumpTable, PtrVT, Custom);
+
+ // We need to handle dynamic allocations specially because of the
+ // 160-byte area at the bottom of the stack.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
+
+ // Use custom expanders so that we can force the function to use
+ // a frame pointer.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
+
+ // Expand these using getExceptionSelectorRegister() and
+ // getExceptionPointerRegister().
+ setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand);
+ setOperationAction(ISD::EHSELECTION, PtrVT, Expand);
+
+ // Handle floating-point types.
+ for (unsigned I = MVT::FIRST_FP_VALUETYPE;
+ I <= MVT::LAST_FP_VALUETYPE;
+ ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ // We can use FI for FRINT.
+ setOperationAction(ISD::FRINT, VT, Legal);
+
+ // No special instructions for these.
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ }
+ }
+
+ // We have fused multiply-addition for f32 and f64 but not f128.
+ setOperationAction(ISD::FMA, MVT::f32, Legal);
+ setOperationAction(ISD::FMA, MVT::f64, Legal);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+ // Needed so that we don't try to implement f128 constant loads using
+ // a load-and-extend of a f80 constant (in cases where the constant
+ // would fit in an f80).
+ setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
+
+ // Floating-point truncation and stores need to be done separately.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+
+ // We have 64-bit FPR<->GPR moves, but need special handling for
+ // 32-bit forms.
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+
+ // VASTART and VACOPY need to deal with the SystemZ-specific varargs
+ // structure, but VAEND is a no-op.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
+ return Imm.isZero() || Imm.isNegZero();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline asm support
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'a': // Address register
+ case 'd': // Data register (equivalent to 'r')
+ case 'f': // Floating-point register
+ case 'r': // General-purpose register
+ return C_RegisterClass;
+
+ case 'Q': // Memory with base and unsigned 12-bit displacement
+ case 'R': // Likewise, plus an index
+ case 'S': // Memory with base and signed 20-bit displacement
+ case 'T': // Likewise, plus an index
+ case 'm': // Equivalent to 'T'.
+ return C_Memory;
+
+ case 'I': // Unsigned 8-bit constant
+ case 'J': // Unsigned 12-bit constant
+ case 'K': // Signed 16-bit constant
+ case 'L': // Signed 20-bit displacement (on all targets we support)
+ case 'M': // 0x7fffffff
+ return C_Other;
+
+ default:
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight SystemZTargetLowering::
+getSingleConstraintMatchWeight(AsmOperandInfo &info,
+ const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+
+ case 'a': // Address register
+ case 'd': // Data register (equivalent to 'r')
+ case 'r': // General-purpose register
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+
+ case 'f': // Floating-point register
+ if (type->isFloatingPointTy())
+ weight = CW_Register;
+ break;
+
+ case 'I': // Unsigned 8-bit constant
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (isUInt<8>(C->getZExtValue()))
+ weight = CW_Constant;
+ break;
+
+ case 'J': // Unsigned 12-bit constant
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (isUInt<12>(C->getZExtValue()))
+ weight = CW_Constant;
+ break;
+
+ case 'K': // Signed 16-bit constant
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (isInt<16>(C->getSExtValue()))
+ weight = CW_Constant;
+ break;
+
+ case 'L': // Signed 20-bit displacement (on all targets we support)
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (isInt<20>(C->getSExtValue()))
+ weight = CW_Constant;
+ break;
+
+ case 'M': // 0x7fffffff
+ if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+ if (C->getZExtValue() == 0x7fffffff)
+ weight = CW_Constant;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC Constraint Letters
+ switch (Constraint[0]) {
+ default: break;
+ case 'd': // Data register (equivalent to 'r')
+ case 'r': // General-purpose register
+ if (VT == MVT::i64)
+ return std::make_pair(0U, &SystemZ::GR64BitRegClass);
+ else if (VT == MVT::i128)
+ return std::make_pair(0U, &SystemZ::GR128BitRegClass);
+ return std::make_pair(0U, &SystemZ::GR32BitRegClass);
+
+ case 'a': // Address register
+ if (VT == MVT::i64)
+ return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
+ else if (VT == MVT::i128)
+ return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
+ return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
+
+ case 'f': // Floating-point register
+ if (VT == MVT::f64)
+ return std::make_pair(0U, &SystemZ::FP64BitRegClass);
+ else if (VT == MVT::f128)
+ return std::make_pair(0U, &SystemZ::FP128BitRegClass);
+ return std::make_pair(0U, &SystemZ::FP32BitRegClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+void SystemZTargetLowering::
+LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ // Only support length 1 constraints for now.
+ if (Constraint.length() == 1) {
+ switch (Constraint[0]) {
+ case 'I': // Unsigned 8-bit constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (isUInt<8>(C->getZExtValue()))
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+ Op.getValueType()));
+ return;
+
+ case 'J': // Unsigned 12-bit constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (isUInt<12>(C->getZExtValue()))
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+ Op.getValueType()));
+ return;
+
+ case 'K': // Signed 16-bit constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (isInt<16>(C->getSExtValue()))
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+ Op.getValueType()));
+ return;
+
+ case 'L': // Signed 20-bit displacement (on all targets we support)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (isInt<20>(C->getSExtValue()))
+ Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+ Op.getValueType()));
+ return;
+
+ case 'M': // 0x7fffffff
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+ if (C->getZExtValue() == 0x7fffffff)
+ Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+ Op.getValueType()));
+ return;
+ }
+ }
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling conventions
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+// Value is a value that has been passed to us in the location described by VA
+// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
+// any loads onto Chain.
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, DebugLoc DL,
+ CCValAssign &VA, SDValue Chain,
+ SDValue Value) {
+ // If the argument has been promoted from a smaller type, insert an
+ // assertion to capture this.
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
+ DAG.getValueType(VA.getValVT()));
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
+ DAG.getValueType(VA.getValVT()));
+
+ if (VA.isExtInLoc())
+ Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
+ else if (VA.getLocInfo() == CCValAssign::Indirect)
+ Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
+ MachinePointerInfo(), false, false, false, 0);
+ else
+ assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
+ return Value;
+}
+
+// Value is a value of type VA.getValVT() that we need to copy into
+// the location described by VA. Return a copy of Value converted to
+// VA.getValVT(). The caller is responsible for handling indirect values.
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, DebugLoc DL,
+ CCValAssign &VA, SDValue Value) {
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::ZExt:
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::AExt:
+ return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
+ case CCValAssign::Full:
+ return Value;
+ default:
+ llvm_unreachable("Unhandled getLocInfo()");
+ }
+}
+
+SDValue SystemZTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SystemZMachineFunctionInfo *FuncInfo =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZFrameLowering *TFL =
+ static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+ unsigned NumFixedGPRs = 0;
+ unsigned NumFixedFPRs = 0;
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ SDValue ArgValue;
+ CCValAssign &VA = ArgLocs[I];
+ EVT LocVT = VA.getLocVT();
+ if (VA.isRegLoc()) {
+ // Arguments passed in registers
+ const TargetRegisterClass *RC;
+ switch (LocVT.getSimpleVT().SimpleTy) {
+ default:
+ // Integers smaller than i64 should be promoted to i64.
+ llvm_unreachable("Unexpected argument type");
+ case MVT::i32:
+ NumFixedGPRs += 1;
+ RC = &SystemZ::GR32BitRegClass;
+ break;
+ case MVT::i64:
+ NumFixedGPRs += 1;
+ RC = &SystemZ::GR64BitRegClass;
+ break;
+ case MVT::f32:
+ NumFixedFPRs += 1;
+ RC = &SystemZ::FP32BitRegClass;
+ break;
+ case MVT::f64:
+ NumFixedFPRs += 1;
+ RC = &SystemZ::FP64BitRegClass;
+ break;
+ }
+
+ unsigned VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(VA.getLocReg(), VReg);
+ ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+ } else {
+ assert(VA.isMemLoc() && "Argument not register or memory");
+
+ // Create the frame index object for this incoming parameter.
+ int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
+ VA.getLocMemOffset(), true);
+
+ // Create the SelectionDAG nodes corresponding to a load
+ // from this parameter. Unpromoted ints and floats are
+ // passed as right-justified 8-byte values.
+ EVT PtrVT = getPointerTy();
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4));
+ ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ }
+
+ // Convert the value of the argument register into the value that's
+ // being passed.
+ InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
+ }
+
+ if (IsVarArg) {
+ // Save the number of non-varargs registers for later use by va_start, etc.
+ FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
+ FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
+
+ // Likewise the address (in the form of a frame index) of where the
+ // first stack vararg would be. The 1-byte size here is arbitrary.
+ int64_t StackSize = CCInfo.getNextStackOffset();
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
+
+ // ...and a similar frame index for the caller-allocated save area
+ // that will be used to store the incoming registers.
+ int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
+ unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
+ FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
+
+ // Store the FPR varargs in the reserved frame slots. (We store the
+ // GPRs as part of the prologue.)
+ if (NumFixedFPRs < SystemZ::NumArgFPRs) {
+ SDValue MemOps[SystemZ::NumArgFPRs];
+ for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
+ unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
+ int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
+ &SystemZ::FP64BitRegClass);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
+ MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0);
+
+ }
+ // Join the stores, which are independent of one another.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &MemOps[NumFixedFPRs],
+ SystemZ::NumArgFPRs - NumFixedFPRs);
+ }
+ }
+
+ return Chain;
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ DebugLoc &DL = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT PtrVT = getPointerTy();
+
+ // SystemZ target does not yet support tail call optimization.
+ isTailCall = false;
+
+ // Analyze the operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+ ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+
+ // Mark the start of the call.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true));
+
+ // Copy argument values to their designated locations.
+ SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ SDValue ArgValue = OutVals[I];
+
+ if (VA.getLocInfo() == CCValAssign::Indirect) {
+ // Store the argument in a stack slot and pass its address.
+ SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ ArgValue = SpillSlot;
+ } else
+ ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
+
+ if (VA.isRegLoc())
+ // Queue up the argument copies and emit them at the end.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+ else {
+ assert(VA.isMemLoc() && "Argument not register or memory");
+
+ // Work out the address of the stack slot. Unpromoted ints and
+ // floats are passed as right-justified 8-byte values.
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
+ unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
+ if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
+ Offset += 4;
+ SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+ DAG.getIntPtrConstant(Offset));
+
+ // Emit the store.
+ MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ }
+
+ // Join the stores, which are independent of one another.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes, chained and glued together.
+ SDValue Glue;
+ for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
+ RegsToPass[I].second, Glue);
+ Glue = Chain.getValue(1);
+ }
+
+ // Accept direct calls by converting symbolic call addresses to the
+ // associated Target* opcodes.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
+ Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
+ Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ }
+
+ // The first call operand is the chain and the second is the target address.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
+ Ops.push_back(DAG.getRegister(RegsToPass[I].first,
+ RegsToPass[I].second.getValueType()));
+
+ // Glue the call to the argument copies, if any.
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+
+ // Emit the call.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+ Glue = Chain.getValue(1);
+
+ // Mark the end of the call, which is glued to the call itself.
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getConstant(NumBytes, PtrVT, true),
+ DAG.getConstant(0, PtrVT, true),
+ Glue);
+ Glue = Chain.getValue(1);
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RetLocs;
+ CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+ RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+ CCValAssign &VA = RetLocs[I];
+
+ // Copy the value out, gluing the copy to the end of the call sequence.
+ SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
+ VA.getLocVT(), Glue);
+ Chain = RetValue.getValue(1);
+ Glue = RetValue.getValue(2);
+
+ // Convert the value of the return register into the value that's
+ // being returned.
+ InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
+ }
+
+ return Chain;
+}
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Assign locations to each returned value.
+ SmallVector<CCValAssign, 16> RetLocs;
+ CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+ RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+ // Quick exit for void returns
+ if (RetLocs.empty())
+ return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
+
+ // Copy the result values into the output registers.
+ SDValue Glue;
+ SmallVector<SDValue, 4> RetOps;
+ RetOps.push_back(Chain);
+ for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+ CCValAssign &VA = RetLocs[I];
+ SDValue RetValue = OutVals[I];
+
+ // Make the return register live on exit.
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // Promote the value as required.
+ RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
+
+ // Chain and glue the copies together.
+ unsigned Reg = VA.getLocReg();
+ Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
+ }
+
+ // Update chain and glue.
+ RetOps[0] = Chain;
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
+
+ return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other,
+ RetOps.data(), RetOps.size());
+}
+
+// CC is a comparison that will be implemented using an integer or
+// floating-point comparison. Return the condition code mask for
+// a branch on true. In the integer case, CCMASK_CMP_UO is set for
+// unsigned comparisons and clear for signed ones. In the floating-point
+// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
+static unsigned CCMaskForCondCode(ISD::CondCode CC) {
+#define CONV(X) \
+ case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
+ case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
+ case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Invalid integer condition!");
+
+ CONV(EQ);
+ CONV(NE);
+ CONV(GT);
+ CONV(GE);
+ CONV(LT);
+ CONV(LE);
+
+ case ISD::SETO: return SystemZ::CCMASK_CMP_O;
+ case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
+ }
+#undef CONV
+}
+
+// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1
+// is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary.
+static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
+ SDValue &CmpOp0, SDValue &CmpOp1,
+ unsigned &CCMask) {
+ // For us to make any changes, it must a comparison between a single-use
+ // load and a constant.
+ if (!CmpOp0.hasOneUse() ||
+ CmpOp0.getOpcode() != ISD::LOAD ||
+ CmpOp1.getOpcode() != ISD::Constant)
+ return;
+
+ // We must have an 8- or 16-bit load.
+ LoadSDNode *Load = cast<LoadSDNode>(CmpOp0);
+ unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
+ if (NumBits != 8 && NumBits != 16)
+ return;
+
+ // The load must be an extending one and the constant must be within the
+ // range of the unextended value.
+ ConstantSDNode *Constant = cast<ConstantSDNode>(CmpOp1);
+ uint64_t Value = Constant->getZExtValue();
+ uint64_t Mask = (1 << NumBits) - 1;
+ if (Load->getExtensionType() == ISD::SEXTLOAD) {
+ int64_t SignedValue = Constant->getSExtValue();
+ if (uint64_t(SignedValue) + (1 << (NumBits - 1)) > Mask)
+ return;
+ // Unsigned comparison between two sign-extended values is equivalent
+ // to unsigned comparison between two zero-extended values.
+ if (IsUnsigned)
+ Value &= Mask;
+ else if (CCMask == SystemZ::CCMASK_CMP_EQ ||
+ CCMask == SystemZ::CCMASK_CMP_NE)
+ // Any choice of IsUnsigned is OK for equality comparisons.
+ // We could use either CHHSI or CLHHSI for 16-bit comparisons,
+ // but since we use CLHHSI for zero extensions, it seems better
+ // to be consistent and do the same here.
+ Value &= Mask, IsUnsigned = true;
+ else if (NumBits == 8) {
+ // Try to treat the comparison as unsigned, so that we can use CLI.
+ // Adjust CCMask and Value as necessary.
+ if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT)
+ // Test whether the high bit of the byte is set.
+ Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true;
+ else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT)
+ // Test whether the high bit of the byte is clear.
+ Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true;
+ else
+ // No instruction exists for this combination.
+ return;
+ }
+ } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
+ if (Value > Mask)
+ return;
+ // Signed comparison between two zero-extended values is equivalent
+ // to unsigned comparison.
+ IsUnsigned = true;
+ } else
+ return;
+
+ // Make sure that the first operand is an i32 of the right extension type.
+ ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
+ if (CmpOp0.getValueType() != MVT::i32 ||
+ Load->getExtensionType() != ExtType)
+ CmpOp0 = DAG.getExtLoad(ExtType, Load->getDebugLoc(), MVT::i32,
+ Load->getChain(), Load->getBasePtr(),
+ Load->getPointerInfo(), Load->getMemoryVT(),
+ Load->isVolatile(), Load->isNonTemporal(),
+ Load->getAlignment());
+
+ // Make sure that the second operand is an i32 with the right value.
+ if (CmpOp1.getValueType() != MVT::i32 ||
+ Value != Constant->getZExtValue())
+ CmpOp1 = DAG.getConstant(Value, MVT::i32);
+}
+
+// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1
+// is an equality comparison that is better implemented using unsigned
+// rather than signed comparison instructions.
+static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0,
+ SDValue CmpOp1, unsigned CCMask) {
+ // The test must be for equality or inequality.
+ if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE)
+ return false;
+
+ if (CmpOp1.getOpcode() == ISD::Constant) {
+ uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue();
+
+ // If we're comparing with memory, prefer unsigned comparisons for
+ // values that are in the unsigned 16-bit range but not the signed
+ // 16-bit range. We want to use CLFHSI and CLGHSI.
+ if (CmpOp0.hasOneUse() &&
+ ISD::isNormalLoad(CmpOp0.getNode()) &&
+ (Value >= 32768 && Value < 65536))
+ return true;
+
+ // Use unsigned comparisons for values that are in the CLGFI range
+ // but not in the CGFI range.
+ if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1)
+ return true;
+
+ return false;
+ }
+
+ // Prefer CL for zero-extended loads.
+ if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND ||
+ ISD::isZEXTLoad(CmpOp1.getNode()))
+ return true;
+
+ // ...and for "in-register" zero extensions.
+ if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) {
+ SDValue Mask = CmpOp1.getOperand(1);
+ if (Mask.getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff)
+ return true;
+ }
+
+ return false;
+}
+
+// Return a target node that compares CmpOp0 and CmpOp1. Set CCMask to the
+// 4-bit condition-code mask for CC.
+static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
+ ISD::CondCode CC, unsigned &CCMask) {
+ bool IsUnsigned = false;
+ CCMask = CCMaskForCondCode(CC);
+ if (!CmpOp0.getValueType().isFloatingPoint()) {
+ IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO;
+ CCMask &= ~SystemZ::CCMASK_CMP_UO;
+ adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
+ if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask))
+ IsUnsigned = true;
+ }
+
+ DebugLoc DL = CmpOp0.getDebugLoc();
+ return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
+ DL, MVT::Glue, CmpOp0, CmpOp1);
+}
+
+// Lower a binary operation that produces two VT results, one in each
+// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
+// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
+// on the extended Op0 and (unextended) Op1. Store the even register result
+// in Even and the odd register result in Odd.
+static void lowerGR128Binary(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+ unsigned Extend, unsigned Opcode,
+ SDValue Op0, SDValue Op1,
+ SDValue &Even, SDValue &Odd) {
+ SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
+ SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
+ SDValue(In128, 0), Op1);
+ bool Is32Bit = is32Bit(VT);
+ SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT);
+ SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT);
+ SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Result, SubReg0);
+ SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ VT, Result, SubReg1);
+ Even = SDValue(Reg0, 0);
+ Odd = SDValue(Reg1, 0);
+}
+
+SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue CmpOp0 = Op.getOperand(2);
+ SDValue CmpOp1 = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc DL = Op.getDebugLoc();
+
+ unsigned CCMask;
+ SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
+ return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
+ Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags);
+}
+
+SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CmpOp0 = Op.getOperand(0);
+ SDValue CmpOp1 = Op.getOperand(1);
+ SDValue TrueOp = Op.getOperand(2);
+ SDValue FalseOp = Op.getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ DebugLoc DL = Op.getDebugLoc();
+
+ unsigned CCMask;
+ SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
+
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(TrueOp);
+ Ops.push_back(FalseOp);
+ Ops.push_back(DAG.getConstant(CCMask, MVT::i32));
+ Ops.push_back(Flags);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+ return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+}
+
+SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Node->getDebugLoc();
+ const GlobalValue *GV = Node->getGlobal();
+ int64_t Offset = Node->getOffset();
+ EVT PtrVT = getPointerTy();
+ Reloc::Model RM = TM.getRelocationModel();
+ CodeModel::Model CM = TM.getCodeModel();
+
+ SDValue Result;
+ if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
+ // Make sure that the offset is aligned to a halfword. If it isn't,
+ // create an "anchor" at the previous 12-bit boundary.
+ // FIXME check whether there is a better way of handling this.
+ if (Offset & 1) {
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+ Offset & ~uint64_t(0xfff));
+ Offset &= 0xfff;
+ } else {
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset);
+ Offset = 0;
+ }
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+ } else {
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+ }
+
+ // If there was a non-zero offset that we didn't fold, create an explicit
+ // addition for it.
+ if (Offset != 0)
+ Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
+ DAG.getConstant(Offset, PtrVT));
+
+ return Result;
+}
+
+SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Node->getDebugLoc();
+ const GlobalValue *GV = Node->getGlobal();
+ EVT PtrVT = getPointerTy();
+ TLSModel::Model model = TM.getTLSModel(GV);
+
+ if (model != TLSModel::LocalExec)
+ llvm_unreachable("only local-exec TLS mode supported");
+
+ // The high part of the thread pointer is in access register 0.
+ SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
+ DAG.getConstant(0, MVT::i32));
+ TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
+
+ // The low part of the thread pointer is in access register 1.
+ SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
+ DAG.getConstant(1, MVT::i32));
+ TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
+
+ // Merge them into a single 64-bit address.
+ SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
+ DAG.getConstant(32, PtrVT));
+ SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
+
+ // Get the offset of GA from the thread pointer.
+ SystemZConstantPoolValue *CPV =
+ SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+
+ // Force the offset into the constant pool and load it from there.
+ SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
+ SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+ CPAddr, MachinePointerInfo::getConstantPool(),
+ false, false, false, 0);
+
+ // Add the base and offset together.
+ return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
+}
+
+SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Node->getDebugLoc();
+ const BlockAddress *BA = Node->getBlockAddress();
+ int64_t Offset = Node->getOffset();
+ EVT PtrVT = getPointerTy();
+
+ SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+ return Result;
+}
+
+SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = JT->getDebugLoc();
+ EVT PtrVT = getPointerTy();
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+
+ // Use LARL to load the address of the table.
+ return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = CP->getDebugLoc();
+ EVT PtrVT = getPointerTy();
+
+ SDValue Result;
+ if (CP->isMachineConstantPoolEntry())
+ Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment());
+ else
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset());
+
+ // Use LARL to load the address of the constant pool entry.
+ return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
+ SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue In = Op.getOperand(0);
+ EVT InVT = In.getValueType();
+ EVT ResVT = Op.getValueType();
+
+ SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
+ SDValue Shift32 = DAG.getConstant(32, MVT::i64);
+ if (InVT == MVT::i32 && ResVT == MVT::f32) {
+ SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32);
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift);
+ SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+ MVT::f32, Out64, SubReg32);
+ return SDValue(Out, 0);
+ }
+ if (InVT == MVT::f32 && ResVT == MVT::i32) {
+ SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
+ SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ MVT::f64, SDValue(U64, 0), In, SubReg32);
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0));
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32);
+ SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
+ return Out;
+ }
+ llvm_unreachable("Unexpected bitcast combination");
+}
+
+SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SystemZMachineFunctionInfo *FuncInfo =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ EVT PtrVT = getPointerTy();
+
+ SDValue Chain = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // The initial values of each field.
+ const unsigned NumFields = 4;
+ SDValue Fields[NumFields] = {
+ DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT),
+ DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT),
+ DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
+ DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
+ };
+
+ // Store each field into its respective slot.
+ SDValue MemOps[NumFields];
+ unsigned Offset = 0;
+ for (unsigned I = 0; I < NumFields; ++I) {
+ SDValue FieldAddr = Addr;
+ if (Offset != 0)
+ FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
+ DAG.getIntPtrConstant(Offset));
+ MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
+ MachinePointerInfo(SV, Offset),
+ false, false, 0);
+ Offset += 8;
+ }
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields);
+}
+
+SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue DstPtr = Op.getOperand(1);
+ SDValue SrcPtr = Op.getOperand(2);
+ const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+ DebugLoc DL = Op.getDebugLoc();
+
+ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
+ /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+ MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue SystemZTargetLowering::
+lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+ // Get a reference to the stack pointer.
+ SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
+
+ // Get the new stack pointer value.
+ SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
+
+ // Copy the new stack pointer back.
+ Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+
+ // The allocated data lives above the 160 bytes allocated for the standard
+ // frame, plus any outgoing stack arguments. We don't know how much that
+ // amounts to yet, so emit a special ADJDYNALLOC placeholder.
+ SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
+ SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
+
+ SDValue Ops[2] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+ assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI");
+
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. UMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ SDValue Ops[2];
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // We use DSGF for 32-bit division.
+ if (is32Bit(VT)) {
+ Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
+ Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1);
+ }
+
+ // DSG(F) takes a 64-bit dividend, so the even register in the GR128
+ // input is "don't care". The instruction returns the remainder in
+ // the even register and the quotient in the odd register.
+ SDValue Ops[2];
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64,
+ Op0, Op1, Ops[1], Ops[0]);
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc DL = Op.getDebugLoc();
+
+ // DL(G) uses a double-width dividend, so we need to clear the even
+ // register in the GR128 input. The instruction returns the remainder
+ // in the even register and the quotient in the odd register.
+ SDValue Ops[2];
+ if (is32Bit(VT))
+ lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ else
+ lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
+
+ // Get the known-zero masks for each operand.
+ SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+ APInt KnownZero[2], KnownOne[2];
+ DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]);
+ DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]);
+
+ // See if the upper 32 bits of one operand and the lower 32 bits of the
+ // other are known zero. They are the low and high operands respectively.
+ uint64_t Masks[] = { KnownZero[0].getZExtValue(),
+ KnownZero[1].getZExtValue() };
+ unsigned High, Low;
+ if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
+ High = 1, Low = 0;
+ else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
+ High = 0, Low = 1;
+ else
+ return Op;
+
+ SDValue LowOp = Ops[Low];
+ SDValue HighOp = Ops[High];
+
+ // If the high part is a constant, we're better off using IILH.
+ if (HighOp.getOpcode() == ISD::Constant)
+ return Op;
+
+ // If the low part is a constant that is outside the range of LHI,
+ // then we're better off using IILF.
+ if (LowOp.getOpcode() == ISD::Constant) {
+ int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
+ if (!isInt<16>(Value))
+ return Op;
+ }
+
+ // Check whether the high part is an AND that doesn't change the
+ // high 32 bits and just masks out low bits. We can skip it if so.
+ if (HighOp.getOpcode() == ISD::AND &&
+ HighOp.getOperand(1).getOpcode() == ISD::Constant) {
+ ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
+ uint64_t Mask = MaskNode->getZExtValue() | Masks[High];
+ if ((Mask >> 32) == 0xffffffff)
+ HighOp = HighOp.getOperand(0);
+ }
+
+ // Take advantage of the fact that all GR32 operations only change the
+ // low 32 bits by truncating Low to an i32 and inserting it directly
+ // using a subreg. The interesting cases are those where the truncation
+ // can be folded.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
+ SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
+ SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ MVT::i64, HighOp, Low32, SubReg32);
+ return SDValue(Result, 0);
+}
+
+// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
+// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned Opcode) const {
+ AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+
+ // 32-bit operations need no code outside the main loop.
+ EVT NarrowVT = Node->getMemoryVT();
+ EVT WideVT = MVT::i32;
+ if (NarrowVT == WideVT)
+ return Op;
+
+ int64_t BitSize = NarrowVT.getSizeInBits();
+ SDValue ChainIn = Node->getChain();
+ SDValue Addr = Node->getBasePtr();
+ SDValue Src2 = Node->getVal();
+ MachineMemOperand *MMO = Node->getMemOperand();
+ DebugLoc DL = Node->getDebugLoc();
+ EVT PtrVT = Addr.getValueType();
+
+ // Convert atomic subtracts of constants into additions.
+ if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Src2)) {
+ Opcode = SystemZISD::ATOMIC_LOADW_ADD;
+ Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType());
+ }
+
+ // Get the address of the containing word.
+ SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+ DAG.getConstant(-4, PtrVT));
+
+ // Get the number of bits that the word must be rotated left in order
+ // to bring the field to the top bits of a GR32.
+ SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+ DAG.getConstant(3, PtrVT));
+ BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+ // Get the complementing shift amount, for rotating a field in the top
+ // bits back to its proper position.
+ SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+ DAG.getConstant(0, WideVT), BitShift);
+
+ // Extend the source operand to 32 bits and prepare it for the inner loop.
+ // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
+ // operations require the source to be shifted in advance. (This shift
+ // can be folded if the source is constant.) For AND and NAND, the lower
+ // bits must be set, while for other opcodes they should be left clear.
+ if (Opcode != SystemZISD::ATOMIC_SWAPW)
+ Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
+ DAG.getConstant(32 - BitSize, WideVT));
+ if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
+ Opcode == SystemZISD::ATOMIC_LOADW_NAND)
+ Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
+ DAG.getConstant(uint32_t(-1) >> BitSize, WideVT));
+
+ // Construct the ATOMIC_LOADW_* node.
+ SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+ SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
+ DAG.getConstant(BitSize, WideVT) };
+ SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
+ array_lengthof(Ops),
+ NarrowVT, MMO);
+
+ // Rotate the result of the final CS so that the field is in the lower
+ // bits of a GR32, then truncate it.
+ SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
+ DAG.getConstant(BitSize, WideVT));
+ SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
+
+ SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
+ return DAG.getMergeValues(RetOps, 2, DL);
+}
+
+// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two
+// into a fullword ATOMIC_CMP_SWAPW operation.
+SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
+ SelectionDAG &DAG) const {
+ AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+
+ // We have native support for 32-bit compare and swap.
+ EVT NarrowVT = Node->getMemoryVT();
+ EVT WideVT = MVT::i32;
+ if (NarrowVT == WideVT)
+ return Op;
+
+ int64_t BitSize = NarrowVT.getSizeInBits();
+ SDValue ChainIn = Node->getOperand(0);
+ SDValue Addr = Node->getOperand(1);
+ SDValue CmpVal = Node->getOperand(2);
+ SDValue SwapVal = Node->getOperand(3);
+ MachineMemOperand *MMO = Node->getMemOperand();
+ DebugLoc DL = Node->getDebugLoc();
+ EVT PtrVT = Addr.getValueType();
+
+ // Get the address of the containing word.
+ SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+ DAG.getConstant(-4, PtrVT));
+
+ // Get the number of bits that the word must be rotated left in order
+ // to bring the field to the top bits of a GR32.
+ SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+ DAG.getConstant(3, PtrVT));
+ BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+ // Get the complementing shift amount, for rotating a field in the top
+ // bits back to its proper position.
+ SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+ DAG.getConstant(0, WideVT), BitShift);
+
+ // Construct the ATOMIC_CMP_SWAPW node.
+ SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+ SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
+ NegBitShift, DAG.getConstant(BitSize, WideVT) };
+ SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
+ VTList, Ops, array_lengthof(Ops),
+ NarrowVT, MMO);
+ return AtomicOp;
+}
+
+SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+ return DAG.getCopyFromReg(Op.getOperand(0), Op.getDebugLoc(),
+ SystemZ::R15D, Op.getValueType());
+}
+
+SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+ return DAG.getCopyToReg(Op.getOperand(0), Op.getDebugLoc(),
+ SystemZ::R15D, Op.getOperand(1));
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ case ISD::BR_CC:
+ return lowerBR_CC(Op, DAG);
+ case ISD::SELECT_CC:
+ return lowerSELECT_CC(Op, DAG);
+ case ISD::GlobalAddress:
+ return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
+ case ISD::GlobalTLSAddress:
+ return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
+ case ISD::BlockAddress:
+ return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
+ case ISD::JumpTable:
+ return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
+ case ISD::ConstantPool:
+ return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
+ case ISD::BITCAST:
+ return lowerBITCAST(Op, DAG);
+ case ISD::VASTART:
+ return lowerVASTART(Op, DAG);
+ case ISD::VACOPY:
+ return lowerVACOPY(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::UMUL_LOHI:
+ return lowerUMUL_LOHI(Op, DAG);
+ case ISD::SDIVREM:
+ return lowerSDIVREM(Op, DAG);
+ case ISD::UDIVREM:
+ return lowerUDIVREM(Op, DAG);
+ case ISD::OR:
+ return lowerOR(Op, DAG);
+ case ISD::ATOMIC_SWAP:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW);
+ case ISD::ATOMIC_LOAD_ADD:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
+ case ISD::ATOMIC_LOAD_SUB:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
+ case ISD::ATOMIC_LOAD_AND:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
+ case ISD::ATOMIC_LOAD_OR:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
+ case ISD::ATOMIC_LOAD_XOR:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
+ case ISD::ATOMIC_LOAD_NAND:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
+ case ISD::ATOMIC_LOAD_MIN:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
+ case ISD::ATOMIC_LOAD_MAX:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
+ case ISD::ATOMIC_LOAD_UMIN:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
+ case ISD::ATOMIC_LOAD_UMAX:
+ return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
+ case ISD::ATOMIC_CMP_SWAP:
+ return lowerATOMIC_CMP_SWAP(Op, DAG);
+ case ISD::STACKSAVE:
+ return lowerSTACKSAVE(Op, DAG);
+ case ISD::STACKRESTORE:
+ return lowerSTACKRESTORE(Op, DAG);
+ default:
+ llvm_unreachable("Unexpected node to lower");
+ }
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
+ switch (Opcode) {
+ OPCODE(RET_FLAG);
+ OPCODE(CALL);
+ OPCODE(PCREL_WRAPPER);
+ OPCODE(CMP);
+ OPCODE(UCMP);
+ OPCODE(BR_CCMASK);
+ OPCODE(SELECT_CCMASK);
+ OPCODE(ADJDYNALLOC);
+ OPCODE(EXTRACT_ACCESS);
+ OPCODE(UMUL_LOHI64);
+ OPCODE(SDIVREM64);
+ OPCODE(UDIVREM32);
+ OPCODE(UDIVREM64);
+ OPCODE(ATOMIC_SWAPW);
+ OPCODE(ATOMIC_LOADW_ADD);
+ OPCODE(ATOMIC_LOADW_SUB);
+ OPCODE(ATOMIC_LOADW_AND);
+ OPCODE(ATOMIC_LOADW_OR);
+ OPCODE(ATOMIC_LOADW_XOR);
+ OPCODE(ATOMIC_LOADW_NAND);
+ OPCODE(ATOMIC_LOADW_MIN);
+ OPCODE(ATOMIC_LOADW_MAX);
+ OPCODE(ATOMIC_LOADW_UMIN);
+ OPCODE(ATOMIC_LOADW_UMAX);
+ OPCODE(ATOMIC_CMP_SWAPW);
+ }
+ return NULL;
+#undef OPCODE
+}
+
+//===----------------------------------------------------------------------===//
+// Custom insertion
+//===----------------------------------------------------------------------===//
+
+// Create a new basic block after MBB.
+static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
+ MachineFunction &MF = *MBB->getParent();
+ MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+ MF.insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB);
+ return NewMBB;
+}
+
+// Split MBB after MI and return the new block (the one that contains
+// instructions after MI).
+static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitSelect(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+
+ unsigned DestReg = MI->getOperand(0).getReg();
+ unsigned TrueReg = MI->getOperand(1).getReg();
+ unsigned FalseReg = MI->getOperand(2).getReg();
+ unsigned CCMask = MI->getOperand(3).getImm();
+ DebugLoc DL = MI->getDebugLoc();
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // jCC JoinMBB
+ // # fallthrough to FalseMBB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(CCMask).addMBB(JoinMBB);
+ MBB->addSuccessor(JoinMBB);
+ MBB->addSuccessor(FalseMBB);
+
+ // FalseMBB:
+ // # fallthrough to JoinMBB
+ MBB = FalseMBB;
+ MBB->addSuccessor(JoinMBB);
+
+ // JoinMBB:
+ // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
+ // ...
+ MBB = JoinMBB;
+ BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg)
+ .addReg(TrueReg).addMBB(StartMBB)
+ .addReg(FalseReg).addMBB(FalseMBB);
+
+ MI->eraseFromParent();
+ return JoinMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
+// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
+// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
+// BitSize is the width of the field in bits, or 0 if this is a partword
+// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
+// is one of the operands. Invert says whether the field should be
+// inverted after performing BinOpcode (e.g. for NAND).
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned BinOpcode,
+ unsigned BitSize,
+ bool Invert) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+ bool IsSubWord = (BitSize < 32);
+
+ // Extract the operands. Base can be a register or a frame index.
+ // Src2 can be a register or immediate.
+ unsigned Dest = MI->getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI->getOperand(1));
+ int64_t Disp = MI->getOperand(2).getImm();
+ MachineOperand Src2 = earlyUseOperand(MI->getOperand(3));
+ unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0);
+ unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
+ DebugLoc DL = MI->getDebugLoc();
+ if (IsSubWord)
+ BitSize = MI->getOperand(6).getImm();
+
+ // Subword operations use 32-bit registers.
+ const TargetRegisterClass *RC = (BitSize <= 32 ?
+ &SystemZ::GR32BitRegClass :
+ &SystemZ::GR64BitRegClass);
+ unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
+ unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
+
+ // Get the right opcodes for the displacement.
+ LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
+ CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+ assert(LOpcode && CSOpcode && "Displacement out of range");
+
+ // Create virtual registers for temporary results.
+ unsigned OrigVal = MRI.createVirtualRegister(RC);
+ unsigned OldVal = MRI.createVirtualRegister(RC);
+ unsigned NewVal = (BinOpcode || IsSubWord ?
+ MRI.createVirtualRegister(RC) : Src2.getReg());
+ unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+ unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+
+ // Insert a basic block for the main loop.
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // ...
+ // %OrigVal = L Disp(%Base)
+ // # fall through to LoopMMB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
+ .addOperand(Base).addImm(Disp).addReg(0);
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
+ // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
+ // %RotatedNewVal = OP %RotatedOldVal, %Src2
+ // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
+ // %Dest = CS %OldVal, %NewVal, Disp(%Base)
+ // JNE LoopMBB
+ // # fall through to DoneMMB
+ MBB = LoopMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+ .addReg(OrigVal).addMBB(StartMBB)
+ .addReg(Dest).addMBB(LoopMBB);
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+ .addReg(OldVal).addReg(BitShift).addImm(0);
+ if (Invert) {
+ // Perform the operation normally and then invert every bit of the field.
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
+ .addReg(RotatedOldVal).addOperand(Src2);
+ if (BitSize < 32)
+ // XILF with the upper BitSize bits set.
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+ .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
+ else if (BitSize == 32)
+ // XILF with every bit set.
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+ .addReg(Tmp).addImm(~uint32_t(0));
+ else {
+ // Use LCGR and add -1 to the result, which is more compact than
+ // an XILF, XILH pair.
+ unsigned Tmp2 = MRI.createVirtualRegister(RC);
+ BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
+ BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
+ .addReg(Tmp2).addImm(-1);
+ }
+ } else if (BinOpcode)
+ // A simply binary operation.
+ BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
+ .addReg(RotatedOldVal).addOperand(Src2);
+ else if (IsSubWord)
+ // Use RISBG to rotate Src2 into position and use it to replace the
+ // field in RotatedOldVal.
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
+ .addReg(RotatedOldVal).addReg(Src2.getReg())
+ .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+ .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
+ .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ MI->eraseFromParent();
+ return DoneMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo
+// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
+// instruction that should be used to compare the current field with the
+// minimum or maximum value. KeepOldMask is the BRC condition-code mask
+// for when the current field should be kept. BitSize is the width of
+// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned CompareOpcode,
+ unsigned KeepOldMask,
+ unsigned BitSize) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+ bool IsSubWord = (BitSize < 32);
+
+ // Extract the operands. Base can be a register or a frame index.
+ unsigned Dest = MI->getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI->getOperand(1));
+ int64_t Disp = MI->getOperand(2).getImm();
+ unsigned Src2 = MI->getOperand(3).getReg();
+ unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0);
+ unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
+ DebugLoc DL = MI->getDebugLoc();
+ if (IsSubWord)
+ BitSize = MI->getOperand(6).getImm();
+
+ // Subword operations use 32-bit registers.
+ const TargetRegisterClass *RC = (BitSize <= 32 ?
+ &SystemZ::GR32BitRegClass :
+ &SystemZ::GR64BitRegClass);
+ unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
+ unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
+
+ // Get the right opcodes for the displacement.
+ LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
+ CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+ assert(LOpcode && CSOpcode && "Displacement out of range");
+
+ // Create virtual registers for temporary results.
+ unsigned OrigVal = MRI.createVirtualRegister(RC);
+ unsigned OldVal = MRI.createVirtualRegister(RC);
+ unsigned NewVal = MRI.createVirtualRegister(RC);
+ unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+ unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
+ unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+
+ // Insert 3 basic blocks for the loop.
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
+ MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+
+ // StartMBB:
+ // ...
+ // %OrigVal = L Disp(%Base)
+ // # fall through to LoopMMB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
+ .addOperand(Base).addImm(Disp).addReg(0);
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
+ // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
+ // CompareOpcode %RotatedOldVal, %Src2
+ // BRCL KeepOldMask, UpdateMBB
+ MBB = LoopMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+ .addReg(OrigVal).addMBB(StartMBB)
+ .addReg(Dest).addMBB(UpdateMBB);
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+ .addReg(OldVal).addReg(BitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(CompareOpcode))
+ .addReg(RotatedOldVal).addReg(Src2);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRCL))
+ .addImm(KeepOldMask).addMBB(UpdateMBB);
+ MBB->addSuccessor(UpdateMBB);
+ MBB->addSuccessor(UseAltMBB);
+
+ // UseAltMBB:
+ // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
+ // # fall through to UpdateMMB
+ MBB = UseAltMBB;
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
+ .addReg(RotatedOldVal).addReg(Src2)
+ .addImm(32).addImm(31 + BitSize).addImm(0);
+ MBB->addSuccessor(UpdateMBB);
+
+ // UpdateMBB:
+ // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
+ // [ %RotatedAltVal, UseAltMBB ]
+ // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
+ // %Dest = CS %OldVal, %NewVal, Disp(%Base)
+ // JNE LoopMBB
+ // # fall through to DoneMMB
+ MBB = UpdateMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
+ .addReg(RotatedOldVal).addMBB(LoopMBB)
+ .addReg(RotatedAltVal).addMBB(UseAltMBB);
+ if (IsSubWord)
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+ .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+ BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
+ .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ MI->eraseFromParent();
+ return DoneMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
+// instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+
+ // Extract the operands. Base can be a register or a frame index.
+ unsigned Dest = MI->getOperand(0).getReg();
+ MachineOperand Base = earlyUseOperand(MI->getOperand(1));
+ int64_t Disp = MI->getOperand(2).getImm();
+ unsigned OrigCmpVal = MI->getOperand(3).getReg();
+ unsigned OrigSwapVal = MI->getOperand(4).getReg();
+ unsigned BitShift = MI->getOperand(5).getReg();
+ unsigned NegBitShift = MI->getOperand(6).getReg();
+ int64_t BitSize = MI->getOperand(7).getImm();
+ DebugLoc DL = MI->getDebugLoc();
+
+ const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
+
+ // Get the right opcodes for the displacement.
+ unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
+ unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
+ assert(LOpcode && CSOpcode && "Displacement out of range");
+
+ // Create virtual registers for temporary results.
+ unsigned OrigOldVal = MRI.createVirtualRegister(RC);
+ unsigned OldVal = MRI.createVirtualRegister(RC);
+ unsigned CmpVal = MRI.createVirtualRegister(RC);
+ unsigned SwapVal = MRI.createVirtualRegister(RC);
+ unsigned StoreVal = MRI.createVirtualRegister(RC);
+ unsigned RetryOldVal = MRI.createVirtualRegister(RC);
+ unsigned RetryCmpVal = MRI.createVirtualRegister(RC);
+ unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
+
+ // Insert 2 basic blocks for the loop.
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
+
+ // StartMBB:
+ // ...
+ // %OrigOldVal = L Disp(%Base)
+ // # fall through to LoopMMB
+ MBB = StartMBB;
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
+ .addOperand(Base).addImm(Disp).addReg(0);
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
+ // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
+ // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
+ // %Dest = RLL %OldVal, BitSize(%BitShift)
+ // ^^ The low BitSize bits contain the field
+ // of interest.
+ // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
+ // ^^ Replace the upper 32-BitSize bits of the
+ // comparison value with those that we loaded,
+ // so that we can use a full word comparison.
+ // CR %Dest, %RetryCmpVal
+ // JNE DoneMBB
+ // # Fall through to SetMBB
+ MBB = LoopMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+ .addReg(OrigOldVal).addMBB(StartMBB)
+ .addReg(RetryOldVal).addMBB(SetMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
+ .addReg(OrigCmpVal).addMBB(StartMBB)
+ .addReg(RetryCmpVal).addMBB(SetMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
+ .addReg(OrigSwapVal).addMBB(StartMBB)
+ .addReg(RetrySwapVal).addMBB(SetMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
+ .addReg(OldVal).addReg(BitShift).addImm(BitSize);
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
+ .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::CR))
+ .addReg(Dest).addReg(RetryCmpVal);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(DoneMBB);
+ MBB->addSuccessor(DoneMBB);
+ MBB->addSuccessor(SetMBB);
+
+ // SetMBB:
+ // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
+ // ^^ Replace the upper 32-BitSize bits of the new
+ // value with those that we loaded.
+ // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
+ // ^^ Rotate the new field to its proper position.
+ // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
+ // JNE LoopMBB
+ // # fall through to ExitMMB
+ MBB = SetMBB;
+ BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
+ .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
+ .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
+ BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
+ .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ MI->eraseFromParent();
+ return DoneMBB;
+}
+
+// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
+// if the high register of the GR128 value must be cleared or false if
+// it's "don't care". SubReg is subreg_odd32 when extending a GR32
+// and subreg_odd when extending a GR64.
+MachineBasicBlock *
+SystemZTargetLowering::emitExt128(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool ClearEven, unsigned SubReg) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
+ if (ClearEven) {
+ unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+ unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
+ .addImm(0);
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
+ .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high);
+ In128 = NewIn128;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
+ .addReg(In128).addReg(Src).addImm(SubReg);
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *SystemZTargetLowering::
+EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
+ switch (MI->getOpcode()) {
+ case SystemZ::Select32:
+ case SystemZ::SelectF32:
+ case SystemZ::Select64:
+ case SystemZ::SelectF64:
+ case SystemZ::SelectF128:
+ return emitSelect(MI, MBB);
+
+ case SystemZ::AEXT128_64:
+ return emitExt128(MI, MBB, false, SystemZ::subreg_low);
+ case SystemZ::ZEXT128_32:
+ return emitExt128(MI, MBB, true, SystemZ::subreg_low32);
+ case SystemZ::ZEXT128_64:
+ return emitExt128(MI, MBB, true, SystemZ::subreg_low);
+
+ case SystemZ::ATOMIC_SWAPW:
+ return emitAtomicLoadBinary(MI, MBB, 0, 0);
+ case SystemZ::ATOMIC_SWAP_32:
+ return emitAtomicLoadBinary(MI, MBB, 0, 32);
+ case SystemZ::ATOMIC_SWAP_64:
+ return emitAtomicLoadBinary(MI, MBB, 0, 64);
+
+ case SystemZ::ATOMIC_LOADW_AR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
+ case SystemZ::ATOMIC_LOADW_AFI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
+ case SystemZ::ATOMIC_LOAD_AR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
+ case SystemZ::ATOMIC_LOAD_AHI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
+ case SystemZ::ATOMIC_LOAD_AFI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
+ case SystemZ::ATOMIC_LOAD_AGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
+ case SystemZ::ATOMIC_LOAD_AGHI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
+ case SystemZ::ATOMIC_LOAD_AGFI:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
+
+ case SystemZ::ATOMIC_LOADW_SR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
+ case SystemZ::ATOMIC_LOAD_SR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
+ case SystemZ::ATOMIC_LOAD_SGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
+
+ case SystemZ::ATOMIC_LOADW_NR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
+ case SystemZ::ATOMIC_LOADW_NILH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0);
+ case SystemZ::ATOMIC_LOAD_NR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
+ case SystemZ::ATOMIC_LOAD_NILL32:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32);
+ case SystemZ::ATOMIC_LOAD_NILH32:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32);
+ case SystemZ::ATOMIC_LOAD_NILF32:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32);
+ case SystemZ::ATOMIC_LOAD_NGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
+ case SystemZ::ATOMIC_LOAD_NILL:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64);
+ case SystemZ::ATOMIC_LOAD_NILH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64);
+ case SystemZ::ATOMIC_LOAD_NIHL:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64);
+ case SystemZ::ATOMIC_LOAD_NIHH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64);
+ case SystemZ::ATOMIC_LOAD_NILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64);
+ case SystemZ::ATOMIC_LOAD_NIHF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64);
+
+ case SystemZ::ATOMIC_LOADW_OR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
+ case SystemZ::ATOMIC_LOADW_OILH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0);
+ case SystemZ::ATOMIC_LOAD_OR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
+ case SystemZ::ATOMIC_LOAD_OILL32:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32);
+ case SystemZ::ATOMIC_LOAD_OILH32:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32);
+ case SystemZ::ATOMIC_LOAD_OILF32:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32);
+ case SystemZ::ATOMIC_LOAD_OGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
+ case SystemZ::ATOMIC_LOAD_OILL:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64);
+ case SystemZ::ATOMIC_LOAD_OILH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64);
+ case SystemZ::ATOMIC_LOAD_OIHL:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64);
+ case SystemZ::ATOMIC_LOAD_OIHH:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64);
+ case SystemZ::ATOMIC_LOAD_OILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64);
+ case SystemZ::ATOMIC_LOAD_OIHF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64);
+
+ case SystemZ::ATOMIC_LOADW_XR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
+ case SystemZ::ATOMIC_LOADW_XILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0);
+ case SystemZ::ATOMIC_LOAD_XR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
+ case SystemZ::ATOMIC_LOAD_XILF32:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32);
+ case SystemZ::ATOMIC_LOAD_XGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
+ case SystemZ::ATOMIC_LOAD_XILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64);
+ case SystemZ::ATOMIC_LOAD_XIHF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64);
+
+ case SystemZ::ATOMIC_LOADW_NRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
+ case SystemZ::ATOMIC_LOADW_NILHi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true);
+ case SystemZ::ATOMIC_LOAD_NRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
+ case SystemZ::ATOMIC_LOAD_NILL32i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true);
+ case SystemZ::ATOMIC_LOAD_NILH32i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true);
+ case SystemZ::ATOMIC_LOAD_NILF32i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true);
+ case SystemZ::ATOMIC_LOAD_NGRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILLi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILHi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHLi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHHi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILFi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHFi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true);
+
+ case SystemZ::ATOMIC_LOADW_MIN:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+ SystemZ::CCMASK_CMP_LE, 0);
+ case SystemZ::ATOMIC_LOAD_MIN_32:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+ SystemZ::CCMASK_CMP_LE, 32);
+ case SystemZ::ATOMIC_LOAD_MIN_64:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
+ SystemZ::CCMASK_CMP_LE, 64);
+
+ case SystemZ::ATOMIC_LOADW_MAX:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+ SystemZ::CCMASK_CMP_GE, 0);
+ case SystemZ::ATOMIC_LOAD_MAX_32:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+ SystemZ::CCMASK_CMP_GE, 32);
+ case SystemZ::ATOMIC_LOAD_MAX_64:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
+ SystemZ::CCMASK_CMP_GE, 64);
+
+ case SystemZ::ATOMIC_LOADW_UMIN:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+ SystemZ::CCMASK_CMP_LE, 0);
+ case SystemZ::ATOMIC_LOAD_UMIN_32:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+ SystemZ::CCMASK_CMP_LE, 32);
+ case SystemZ::ATOMIC_LOAD_UMIN_64:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
+ SystemZ::CCMASK_CMP_LE, 64);
+
+ case SystemZ::ATOMIC_LOADW_UMAX:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+ SystemZ::CCMASK_CMP_GE, 0);
+ case SystemZ::ATOMIC_LOAD_UMAX_32:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+ SystemZ::CCMASK_CMP_GE, 32);
+ case SystemZ::ATOMIC_LOAD_UMAX_64:
+ return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
+ SystemZ::CCMASK_CMP_GE, 64);
+
+ case SystemZ::ATOMIC_CMP_SWAPW:
+ return emitAtomicCmpSwapW(MI, MBB);
+ default:
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 0000000..eea820c
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,212 @@
+//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
+#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+
+#include "SystemZ.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+namespace SystemZISD {
+ enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Return with a flag operand. Operand 0 is the chain operand.
+ RET_FLAG,
+
+ // Calls a function. Operand 0 is the chain operand and operand 1
+ // is the target address. The arguments start at operand 2.
+ // There is an optional glue operand at the end.
+ CALL,
+
+ // Wraps a TargetGlobalAddress that should be loaded using PC-relative
+ // accesses (LARL). Operand 0 is the address.
+ PCREL_WRAPPER,
+
+ // Signed integer and floating-point comparisons. The operands are the
+ // two values to compare.
+ CMP,
+
+ // Likewise unsigned integer comparison.
+ UCMP,
+
+ // Branches if a condition is true. Operand 0 is the chain operand;
+ // operand 1 is the 4-bit condition-code mask, with bit N in
+ // big-endian order meaning "branch if CC=N"; operand 2 is the
+ // target block and operand 3 is the flag operand.
+ BR_CCMASK,
+
+ // Selects between operand 0 and operand 1. Operand 2 is the
+ // mask of condition-code values for which operand 0 should be
+ // chosen over operand 1; it has the same form as BR_CCMASK.
+ // Operand 3 is the flag operand.
+ SELECT_CCMASK,
+
+ // Evaluates to the gap between the stack pointer and the
+ // base of the dynamically-allocatable area.
+ ADJDYNALLOC,
+
+ // Extracts the value of a 32-bit access register. Operand 0 is
+ // the number of the register.
+ EXTRACT_ACCESS,
+
+ // Wrappers around the ISD opcodes of the same name. The output and
+ // first input operands are GR128s. The trailing numbers are the
+ // widths of the second operand in bits.
+ UMUL_LOHI64,
+ SDIVREM64,
+ UDIVREM32,
+ UDIVREM64,
+
+ // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
+ // ATOMIC_LOAD_<op>.
+ //
+ // Operand 0: the address of the containing 32-bit-aligned field
+ // Operand 1: the second operand of <op>, in the high bits of an i32
+ // for everything except ATOMIC_SWAPW
+ // Operand 2: how many bits to rotate the i32 left to bring the first
+ // operand into the high bits
+ // Operand 3: the negative of operand 2, for rotating the other way
+ // Operand 4: the width of the field in bits (8 or 16)
+ ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ ATOMIC_LOADW_ADD,
+ ATOMIC_LOADW_SUB,
+ ATOMIC_LOADW_AND,
+ ATOMIC_LOADW_OR,
+ ATOMIC_LOADW_XOR,
+ ATOMIC_LOADW_NAND,
+ ATOMIC_LOADW_MIN,
+ ATOMIC_LOADW_MAX,
+ ATOMIC_LOADW_UMIN,
+ ATOMIC_LOADW_UMAX,
+
+ // A wrapper around the inner loop of an ATOMIC_CMP_SWAP.
+ //
+ // Operand 0: the address of the containing 32-bit-aligned field
+ // Operand 1: the compare value, in the low bits of an i32
+ // Operand 2: the swap value, in the low bits of an i32
+ // Operand 3: how many bits to rotate the i32 left to bring the first
+ // operand into the high bits
+ // Operand 4: the negative of operand 2, for rotating the other way
+ // Operand 5: the width of the field in bits (8 or 16)
+ ATOMIC_CMP_SWAPW
+ };
+}
+
+class SystemZSubtarget;
+class SystemZTargetMachine;
+
+class SystemZTargetLowering : public TargetLowering {
+public:
+ explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+
+ // Override TargetLowering.
+ virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE {
+ return MVT::i32;
+ }
+ virtual EVT getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+ }
+ virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE {
+ return true;
+ }
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
+ virtual std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const LLVM_OVERRIDE;
+ virtual TargetLowering::ConstraintType
+ getConstraintType(const std::string &Constraint) const LLVM_OVERRIDE;
+ virtual TargetLowering::ConstraintWeight
+ getSingleConstraintMatchWeight(AsmOperandInfo &info,
+ const char *constraint) const LLVM_OVERRIDE;
+ virtual void
+ LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const LLVM_OVERRIDE;
+ virtual MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const LLVM_OVERRIDE;
+ virtual SDValue LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const LLVM_OVERRIDE;
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE;
+ virtual SDValue
+ LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE;
+
+private:
+ const SystemZSubtarget &Subtarget;
+ const SystemZTargetMachine &TM;
+
+ // Implement LowerOperation for individual opcodes.
+ SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const;
+ SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+ SelectionDAG &DAG) const;
+ SDValue lowerBlockAddress(BlockAddressSDNode *Node,
+ SelectionDAG &DAG) const;
+ SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
+ SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
+ SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG,
+ unsigned Opcode) const;
+ SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+
+ // Implement EmitInstrWithCustomInserter for individual operation types.
+ MachineBasicBlock *emitSelect(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitExt128(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool ClearEven, unsigned SubReg) const;
+ MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned BinOpcode, unsigned BitSize,
+ bool Invert = false) const;
+ MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned CompareOpcode,
+ unsigned KeepOldMask,
+ unsigned BitSize) const;
+ MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+};
+} // end namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 0000000..fb699b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,48 @@
+//===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZINSTRBUILDER_H
+#define SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// Add a BDX memory reference for frame object FI to MIB.
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI) {
+ MachineInstr *MI = MIB;
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo *MFFrame = MF.getFrameInfo();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Flags = 0;
+ if (MCID.mayLoad())
+ Flags |= MachineMemOperand::MOLoad;
+ if (MCID.mayStore())
+ Flags |= MachineMemOperand::MOStore;
+ int64_t Offset = 0;
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(
+ PseudoSourceValue::getFixedStack(FI), Offset),
+ Flags, MFFrame->getObjectSize(FI),
+ MFFrame->getObjectAlignment(FI));
+ return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 0000000..7c9f0e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,318 @@
+//==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Control-flow instructions
+//===----------------------------------------------------------------------===//
+
+// C's ?: operator for floating-point operands.
+def SelectF32 : SelectWrapper<FP32>;
+def SelectF64 : SelectWrapper<FP64>;
+def SelectF128 : SelectWrapper<FP128>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero.
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def LZER : InherentRRE<"lzer", 0xB374, FP32, (fpimm0)>;
+ def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>;
+ def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>;
+}
+
+// Moves between two floating-point registers.
+let neverHasSideEffects = 1 in {
+ def LER : UnaryRR <"ler", 0x38, null_frag, FP32, FP32>;
+ def LDR : UnaryRR <"ldr", 0x28, null_frag, FP64, FP64>;
+ def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>;
+}
+
+// Moves between 64-bit integer and floating-point registers.
+def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
+def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>;
+
+// fcopysign with an FP32 result.
+let isCodeGenOnly = 1 in {
+ def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>;
+ def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>;
+}
+
+// The sign of an FP128 is in the high register. Give the CPSDRsd
+// operands in R1, R2, R3 order.
+def : Pat<(fcopysign FP32:$src1, FP128:$src2),
+ (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>;
+
+// fcopysign with an FP64 result.
+let isCodeGenOnly = 1 in
+ def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>;
+def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>;
+
+// The sign of an FP128 is in the high register. Give the CPSDRdd
+// operands in R1, R2, R3 order.
+def : Pat<(fcopysign FP64:$src1, FP128:$src2),
+ (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>;
+
+// fcopysign with an FP128 result. Use "upper" as the high half and leave
+// the low half as-is.
+class CopySign128<RegisterOperand cls, dag upper>
+ : Pat<(fcopysign FP128:$src1, cls:$src2),
+ (INSERT_SUBREG FP128:$src1, upper, subreg_high)>;
+
+// Give the CPSDR* operands in R1, R2, R3 order.
+def : CopySign128<FP32, (CPSDRds FP32:$src2,
+ (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+def : CopySign128<FP64, (CPSDRdd FP64:$src2,
+ (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high),
+ (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+
+//===----------------------------------------------------------------------===//
+// Load instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+ defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32>;
+ defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64>;
+
+ // These instructions are split after register allocation, so we don't
+ // want a custom inserter.
+ let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+ def LX : Pseudo<(outs FP128:$dst), (ins bdxaddr20only128:$src),
+ [(set FP128:$dst, (load bdxaddr20only128:$src))]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Store instructions
+//===----------------------------------------------------------------------===//
+
+let SimpleBDXStore = 1 in {
+ defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32>;
+ defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64>;
+
+ // These instructions are split after register allocation, so we don't
+ // want a custom inserter.
+ let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+ def STX : Pseudo<(outs), (ins FP128:$src, bdxaddr20only128:$dst),
+ [(store FP128:$src, bdxaddr20only128:$dst)]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations, rounding
+// according to the current mode. The destination of LEXBR and LDXBR
+// is a 128-bit value, but only the first register of the pair is used.
+def LEDBR : UnaryRRE<"ledbr", 0xB344, fround, FP32, FP64>;
+def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
+def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
+
+def : Pat<(f32 (fround FP128:$src)),
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>;
+def : Pat<(f64 (fround FP128:$src)),
+ (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>;
+
+// Extend register floating-point values to wider representations.
+def LDEBR : UnaryRRE<"ldebr", 0xB304, fextend, FP64, FP32>;
+def LXEBR : UnaryRRE<"lxebr", 0xB306, fextend, FP128, FP32>;
+def LXDBR : UnaryRRE<"lxdbr", 0xB305, fextend, FP128, FP64>;
+
+// Extend memory floating-point values to wider representations.
+def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64>;
+def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128>;
+def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128>;
+
+// Convert a signed integer register value to a floating-point one.
+let Defs = [PSW] in {
+ def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
+ def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>;
+ def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+
+ def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32, GR64>;
+ def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64, GR64>;
+ def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+}
+
+// Convert a floating-point register value to a signed integer value,
+// with the second operand (modifier M3) specifying the rounding mode.
+let Defs = [PSW] in {
+ def CFEBR : UnaryRRF<"cfebr", 0xB398, GR32, FP32>;
+ def CFDBR : UnaryRRF<"cfdbr", 0xB399, GR32, FP64>;
+ def CFXBR : UnaryRRF<"cfxbr", 0xB39A, GR32, FP128>;
+
+ def CGEBR : UnaryRRF<"cgebr", 0xB3A8, GR64, FP32>;
+ def CGDBR : UnaryRRF<"cgdbr", 0xB3A9, GR64, FP64>;
+ def CGXBR : UnaryRRF<"cgxbr", 0xB3AA, GR64, FP128>;
+}
+
+// fp_to_sint always rounds towards zero, which is modifier value 5.
+def : Pat<(i32 (fp_to_sint FP32:$src)), (CFEBR FP32:$src, 5)>;
+def : Pat<(i32 (fp_to_sint FP64:$src)), (CFDBR FP64:$src, 5)>;
+def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>;
+
+def : Pat<(i64 (fp_to_sint FP32:$src)), (CGEBR FP32:$src, 5)>;
+def : Pat<(i64 (fp_to_sint FP64:$src)), (CGDBR FP64:$src, 5)>;
+def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>;
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Negation (Load Complement).
+let Defs = [PSW] in {
+ def LCEBR : UnaryRRE<"lcebr", 0xB303, fneg, FP32, FP32>;
+ def LCDBR : UnaryRRE<"lcdbr", 0xB313, fneg, FP64, FP64>;
+ def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>;
+}
+
+// Absolute value (Load Positive).
+let Defs = [PSW] in {
+ def LPEBR : UnaryRRE<"lpebr", 0xB300, fabs, FP32, FP32>;
+ def LPDBR : UnaryRRE<"lpdbr", 0xB310, fabs, FP64, FP64>;
+ def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>;
+}
+
+// Negative absolute value (Load Negative).
+let Defs = [PSW] in {
+ def LNEBR : UnaryRRE<"lnebr", 0xB301, fnabs, FP32, FP32>;
+ def LNDBR : UnaryRRE<"lndbr", 0xB311, fnabs, FP64, FP64>;
+ def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>;
+}
+
+// Square root.
+def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>;
+def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>;
+def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+
+def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32>;
+def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64>;
+
+// Round to an integer, with the second operand (modifier M3) specifying
+// the rounding mode.
+//
+// These forms always check for inexact conditions. z196 added versions
+// that allow this to suppressed (as for fnearbyint), but we don't yet
+// support -march=z196.
+let Defs = [PSW] in {
+ def FIEBR : UnaryRRF<"fiebr", 0xB357, FP32, FP32>;
+ def FIDBR : UnaryRRF<"fidbr", 0xB35F, FP64, FP64>;
+ def FIXBR : UnaryRRF<"fixbr", 0xB347, FP128, FP128>;
+}
+
+// frint rounds according to the current mode (modifier 0) and detects
+// inexact conditions.
+def : Pat<(frint FP32:$src), (FIEBR FP32:$src, 0)>;
+def : Pat<(frint FP64:$src), (FIDBR FP64:$src, 0)>;
+def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [PSW] in {
+ let isCommutable = 1 in {
+ def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>;
+ def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>;
+ def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+ }
+ def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load>;
+ def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load>;
+}
+
+// Subtraction.
+let Defs = [PSW] in {
+ def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>;
+ def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>;
+ def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
+
+ def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load>;
+ def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load>;
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+ def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>;
+ def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>;
+ def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>;
+}
+def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load>;
+def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load>;
+
+// f64 multiplication of two FP32 registers.
+def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
+ (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ FP32:$src1, subreg_32bit), FP32:$src2)>;
+
+// f64 multiplication of an FP32 register and an f32 memory.
+def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load>;
+def : Pat<(fmul (f64 (fextend FP32:$src1)),
+ (f64 (extloadf32 bdxaddr12only:$addr))),
+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit),
+ bdxaddr12only:$addr)>;
+
+// f128 multiplication of two FP64 registers.
+def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))),
+ (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+ FP64:$src1, subreg_high), FP64:$src2)>;
+
+// f128 multiplication of an FP64 register and an f64 memory.
+def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load>;
+def : Pat<(fmul (f128 (fextend FP64:$src1)),
+ (f128 (extloadf64 bdxaddr12only:$addr))),
+ (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high),
+ bdxaddr12only:$addr)>;
+
+// Fused multiply-add.
+def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>;
+def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>;
+
+def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load>;
+def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load>;
+
+// Fused multiply-subtract.
+def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>;
+def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>;
+
+def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load>;
+def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load>;
+
+// Division.
+def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>;
+def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>;
+def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+
+def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load>;
+def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load>;
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+ def CEBR : CompareRRE<"cebr", 0xB309, z_cmp, FP32, FP32>;
+ def CDBR : CompareRRE<"cdbr", 0xB319, z_cmp, FP64, FP64>;
+ def CXBR : CompareRRE<"cxbr", 0xB349, z_cmp, FP128, FP128>;
+
+ def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load>;
+ def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// Peepholes
+//===----------------------------------------------------------------------===//
+
+def : Pat<(f32 fpimmneg0), (LCEBR (LZER))>;
+def : Pat<(f64 fpimmneg0), (LCDBR (LZDR))>;
+def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 0000000..b32b7eb
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,987 @@
+//==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Basic SystemZ instruction definition
+//===----------------------------------------------------------------------===//
+
+class InstSystemZ<int size, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : Instruction {
+ let Namespace = "SystemZ";
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Size = size;
+ let Pattern = pattern;
+ let AsmString = asmstr;
+
+ // Used to identify a group of related instructions, such as ST and STY.
+ string Function = "";
+
+ // "12" for an instruction that has a ...Y equivalent, "20" for that
+ // ...Y equivalent.
+ string PairType = "none";
+
+ // True if this instruction is a simple D(X,B) load of a register
+ // (with no sign or zero extension).
+ bit SimpleBDXLoad = 0;
+
+ // True if this instruction is a simple D(X,B) store of a register
+ // (with no truncation).
+ bit SimpleBDXStore = 0;
+
+ // True if this instruction has a 20-bit displacement field.
+ bit Has20BitOffset = 0;
+
+ // True if addresses in this instruction have an index register.
+ bit HasIndex = 0;
+
+ // True if this is a 128-bit pseudo instruction that combines two 64-bit
+ // operations.
+ bit Is128Bit = 0;
+
+ let TSFlags{0} = SimpleBDXLoad;
+ let TSFlags{1} = SimpleBDXStore;
+ let TSFlags{2} = Has20BitOffset;
+ let TSFlags{3} = HasIndex;
+ let TSFlags{4} = Is128Bit;
+}
+
+//===----------------------------------------------------------------------===//
+// Mappings between instructions
+//===----------------------------------------------------------------------===//
+
+// Return the version of an instruction that has an unsigned 12-bit
+// displacement.
+def getDisp12Opcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["Function"];
+ let ColFields = ["PairType"];
+ let KeyCol = ["20"];
+ let ValueCols = [["12"]];
+}
+
+// Return the version of an instruction that has a signed 20-bit displacement.
+def getDisp20Opcode : InstrMapping {
+ let FilterClass = "InstSystemZ";
+ let RowFields = ["Function"];
+ let ColFields = ["PairType"];
+ let KeyCol = ["12"];
+ let ValueCols = [["20"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+//
+// Formats are specified using operand field declarations of the form:
+//
+// bits<4> Rn : register input or output for operand n
+// bits<m> In : immediate value of width m for operand n
+// bits<4> Bn : base register for address operand n
+// bits<m> Dn : displacement value of width m for address operand n
+// bits<4> Xn : index register for address operand n
+// bits<4> Mn : mode value for operand n
+//
+// The operand numbers ("n" in the list above) follow the architecture manual,
+// but the fields are always declared in assembly order, so there are some
+// cases where operand "2" comes after operand "3". For address operands,
+// the base register field is declared first, followed by the displacement,
+// followed by the index (if any). This matches the bdaddr* and bdxaddr*
+// orders.
+//
+//===----------------------------------------------------------------------===//
+
+class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+
+ bits<4> R1;
+ bits<16> I2;
+
+ let Inst{31-24} = op{11-4};
+ let Inst{23-20} = R1;
+ let Inst{19-16} = op{3-0};
+ let Inst{15-0} = I2;
+}
+
+class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<8> I3;
+ bits<8> I4;
+ bits<8> I5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R2;
+ let Inst{31-24} = I3;
+ let Inst{23-16} = I4;
+ let Inst{15-8} = I5;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+
+ bits<4> R1;
+ bits<32> I2;
+
+ let Inst{47-40} = op{11-4};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = op{3-0};
+ let Inst{31-0} = I2;
+}
+
+class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<2, outs, ins, asmstr, pattern> {
+ field bits<16> Inst;
+
+ bits<4> R1;
+ bits<4> R2;
+
+ let Inst{15-8} = op;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<4> R2;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = R1;
+ let Inst{11-8} = 0;
+ let Inst{7-4} = R3;
+ let Inst{3-0} = R2;
+}
+
+class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+
+ bits<4> R1;
+ bits<4> R2;
+
+ let Inst{31-16} = op;
+ let Inst{15-8} = 0;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+
+ bits<4> R1;
+ bits<4> R2;
+ bits<4> R3;
+
+ let Inst{31-16} = op;
+ let Inst{15-12} = R3;
+ let Inst{11-8} = 0;
+ let Inst{7-4} = R1;
+ let Inst{3-0} = R2;
+}
+
+class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+
+ bits<4> R1;
+ bits<4> B2;
+ bits<12> D2;
+ bits<4> X2;
+
+ let Inst{31-24} = op;
+ let Inst{23-20} = R1;
+ let Inst{19-16} = X2;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
+
+ let HasIndex = 1;
+}
+
+class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+
+ bits<4> R1;
+ bits<4> B2;
+ bits<12> D2;
+ bits<4> X2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = X2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
+ let Inst{15-8} = 0;
+ let Inst{7-0} = op{7-0};
+
+ let HasIndex = 1;
+}
+
+class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<4> B2;
+ bits<12> D2;
+ bits<4> X2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R3;
+ let Inst{35-32} = X2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
+ let Inst{15-12} = R1;
+ let Inst{11-8} = 0;
+ let Inst{7-0} = op{7-0};
+
+ let HasIndex = 1;
+}
+
+class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+
+ bits<4> R1;
+ bits<4> B2;
+ bits<20> D2;
+ bits<4> X2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = X2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2{11-0};
+ let Inst{15-8} = D2{19-12};
+ let Inst{7-0} = op{7-0};
+
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+}
+
+class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<4> B2;
+ bits<12> D2;
+
+ let Inst{31-24} = op;
+ let Inst{23-20} = R1;
+ let Inst{19-16} = R3;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
+}
+
+class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<4> B2;
+ bits<20> D2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R3;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2{11-0};
+ let Inst{15-8} = D2{19-12};
+ let Inst{7-0} = op{7-0};
+
+ let Has20BitOffset = 1;
+}
+
+class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+
+ bits<4> B1;
+ bits<12> D1;
+ bits<8> I2;
+
+ let Inst{31-24} = op;
+ let Inst{23-16} = I2;
+ let Inst{15-12} = B1;
+ let Inst{11-0} = D1;
+}
+
+class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+
+ bits<4> B1;
+ bits<12> D1;
+ bits<16> I2;
+
+ let Inst{47-32} = op;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
+ let Inst{15-0} = I2;
+}
+
+class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+
+ bits<4> B1;
+ bits<20> D1;
+ bits<8> I2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-32} = I2;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1{11-0};
+ let Inst{15-8} = D1{19-12};
+ let Inst{7-0} = op{7-0};
+
+ let Has20BitOffset = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions with semantics
+//===----------------------------------------------------------------------===//
+//
+// These classes have the form <Category><Format>, where <Format> is one
+// of the formats defined above and where <Category> describes the inputs
+// and outputs. <Category> can be one of:
+//
+// Inherent:
+// One register output operand and no input operands.
+//
+// Store:
+// One register or immediate input operand and one address input operand.
+// The instruction stores the first operand to the address.
+//
+// This category is used for both pure and truncating stores.
+//
+// LoadMultiple:
+// One address input operand and two explicit output operands.
+// The instruction loads a range of registers from the address,
+// with the explicit operands giving the first and last register
+// to load. Other loaded registers are added as implicit definitions.
+//
+// StoreMultiple:
+// Two explicit input register operands and an address operand.
+// The instruction stores a range of registers to the address,
+// with the explicit operands giving the first and last register
+// to store. Other stored registers are added as implicit uses.
+//
+// Unary:
+// One register output operand and one input operand. The input
+// operand may be a register, immediate or memory.
+//
+// Binary:
+// One register output operand and two input operands. The first
+// input operand is always a register and he second may be a register,
+// immediate or memory.
+//
+// Shift:
+// One register output operand and two input operands. The first
+// input operand is a register and the second has the same form as
+// an address (although it isn't actually used to address memory).
+//
+// Compare:
+// Two input operands. The first operand is always a register,
+// the second may be a register, immediate or memory.
+//
+// Ternary:
+// One register output operand and three register input operands.
+//
+// CmpSwap:
+// One output operand and three input operands. The first two
+// operands are registers and the third is an address. The instruction
+// both reads from and writes to the address.
+//
+// RotateSelect:
+// One output operand and five input operands. The first two operands
+// are registers and the other three are immediates.
+//
+// The format determines which input operands are tied to output operands,
+// and also determines the shape of any address operand.
+//
+// Multiclasses of the form <Category><Format>Pair define two instructions,
+// one with <Category><Format> and one with <Category><Format>Y. The name
+// of the first instruction has no suffix, the name of the second has
+// an extra "y".
+//
+//===----------------------------------------------------------------------===//
+
+class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+ dag src>
+ : InstRRE<opcode, (outs cls:$dst), (ins),
+ mnemonic#"\t$dst",
+ [(set cls:$dst, src)]> {
+ let R2 = 0;
+}
+
+class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr),
+ mnemonic#"\t$dst1, $dst2, $addr", []> {
+ let mayLoad = 1;
+}
+
+class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr),
+ mnemonic#"\t$src, $addr",
+ [(operator cls:$src, pcrel32:$addr)]> {
+ let mayStore = 1;
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdxaddr12only>
+ : InstRX<opcode, (outs), (ins cls:$src, mode:$addr),
+ mnemonic#"\t$src, $addr",
+ [(operator cls:$src, mode:$addr)]> {
+ let mayStore = 1;
+}
+
+class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdxaddr20only>
+ : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr),
+ mnemonic#"\t$src, $addr",
+ [(operator cls:$src, mode:$addr)]> {
+ let mayStore = 1;
+}
+
+multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+ SDPatternOperator operator, RegisterOperand cls> {
+ let Function = mnemonic ## #cls in {
+ let PairType = "12" in
+ def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>;
+ let PairType = "20" in
+ def Y : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>;
+ }
+}
+
+class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr),
+ mnemonic#"\t$from, $to, $addr", []> {
+ let mayStore = 1;
+}
+
+class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ Immediate imm, AddressingMode mode = bdaddr12only>
+ : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+ mnemonic#"\t$addr, $src",
+ [(operator imm:$src, mode:$addr)]> {
+ let mayStore = 1;
+}
+
+class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ Immediate imm, AddressingMode mode = bdaddr20only>
+ : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+ mnemonic#"\t$addr, $src",
+ [(operator imm:$src, mode:$addr)]> {
+ let mayStore = 1;
+}
+
+class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ Immediate imm>
+ : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
+ mnemonic#"\t$addr, $src",
+ [(operator imm:$src, bdaddr12only:$addr)]> {
+ let mayStore = 1;
+}
+
+multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
+ SDPatternOperator operator, Immediate imm> {
+ let Function = mnemonic in {
+ let PairType = "12" in
+ def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+ let PairType = "20" in
+ def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+ }
+}
+
+class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src),
+ mnemonic#"\t$dst, $src",
+ [(set cls1:$dst, (operator cls2:$src))]>;
+
+class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src),
+ mnemonic#"\t$dst, $src",
+ [(set cls1:$dst, (operator cls2:$src))]>;
+
+class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode),
+ mnemonic#"\t$dst, $mode, $src", []>;
+
+class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRI<opcode, (outs cls:$dst), (ins imm:$src),
+ mnemonic#"\t$dst, $src",
+ [(set cls:$dst, (operator imm:$src))]>;
+
+class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRIL<opcode, (outs cls:$dst), (ins imm:$src),
+ mnemonic#"\t$dst, $src",
+ [(set cls:$dst, (operator imm:$src))]>;
+
+class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr),
+ mnemonic#"\t$dst, $addr",
+ [(set cls:$dst, (operator pcrel32:$addr))]> {
+ let mayLoad = 1;
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdxaddr12only>
+ : InstRX<opcode, (outs cls:$dst), (ins mode:$addr),
+ mnemonic#"\t$dst, $addr",
+ [(set cls:$dst, (operator mode:$addr))]> {
+ let mayLoad = 1;
+}
+
+class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr),
+ mnemonic#"\t$dst, $addr",
+ [(set cls:$dst, (operator bdxaddr12only:$addr))]> {
+ let mayLoad = 1;
+}
+
+class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdxaddr20only>
+ : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr),
+ mnemonic#"\t$dst, $addr",
+ [(set cls:$dst, (operator mode:$addr))]> {
+ let mayLoad = 1;
+}
+
+multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+ SDPatternOperator operator, RegisterOperand cls> {
+ let Function = mnemonic ## #cls in {
+ let PairType = "12" in
+ def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>;
+ let PairType = "20" in
+ def Y : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>;
+ }
+}
+
+class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
+ mnemonic#"\t$dst, $src2",
+ [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+}
+
+class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
+ mnemonic#"\t$dst, $src2",
+ [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+}
+
+// Here the assembly and dag operands are in natural order,
+// but the first input operand maps to R3 and the second to R2.
+// This is used for "CPSDR R1, R3, R2", which is equivalent to
+// R1 = copysign (R3, R2).
+//
+// Direct uses of the instruction must pass operands in encoding order --
+// R1, R2, R3 -- so they must pass the source operands in reverse order.
+class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1),
+ mnemonic#"\t$dst, $src1, $src2",
+ [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>;
+
+class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
+ mnemonic#"\t$dst, $src2",
+ [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+}
+
+class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
+ mnemonic#"\t$dst, $src2",
+ [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+}
+
+class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load,
+ AddressingMode mode = bdxaddr12only>
+ : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+ mnemonic#"\t$dst, $src2",
+ [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+ let mayLoad = 1;
+}
+
+class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load>
+ : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2),
+ mnemonic#"\t$dst, $src2",
+ [(set cls:$dst, (operator cls:$src1,
+ (load bdxaddr12only:$src2)))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+ let mayLoad = 1;
+}
+
+class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load,
+ AddressingMode mode = bdxaddr20only>
+ : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+ mnemonic#"\t$dst, $src2",
+ [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+ let mayLoad = 1;
+}
+
+multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load> {
+ let Function = mnemonic ## #cls in {
+ let PairType = "12" in
+ def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bdxaddr12pair>;
+ let PairType = "20" in
+ def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load,
+ bdxaddr20pair>;
+ }
+}
+
+class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ Operand imm, AddressingMode mode = bdaddr12only>
+ : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+ mnemonic#"\t$addr, $src",
+ [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ Operand imm, AddressingMode mode = bdaddr20only>
+ : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+ mnemonic#"\t$addr, $src",
+ [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
+ bits<16> siyOpcode, SDPatternOperator operator,
+ Operand imm> {
+ let Function = mnemonic ## #cls in {
+ let PairType = "12" in
+ def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+ let PairType = "20" in
+ def Y : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+ }
+}
+
+class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode>
+ : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+ mnemonic#"\t$dst, $src2",
+ [(set cls:$dst, (operator cls:$src1, mode:$src2))]> {
+ let R3 = 0;
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+}
+
+class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode>
+ : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+ mnemonic#"\t$dst, $src1, $src2",
+ [(set cls:$dst, (operator cls:$src1, mode:$src2))]>;
+
+class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2),
+ mnemonic#"\t$src1, $src2",
+ [(operator cls1:$src1, cls2:$src2)]>;
+
+class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2),
+ mnemonic#"\t$src1, $src2",
+ [(operator cls1:$src1, cls2:$src2)]>;
+
+class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2),
+ mnemonic#"\t$src1, $src2",
+ [(operator cls:$src1, imm:$src2)]>;
+
+class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm>
+ : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2),
+ mnemonic#"\t$src1, $src2",
+ [(operator cls:$src1, imm:$src2)]>;
+
+class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load>
+ : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2),
+ mnemonic#"\t$src1, $src2",
+ [(operator cls:$src1, (load pcrel32:$src2))]> {
+ let mayLoad = 1;
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load,
+ AddressingMode mode = bdxaddr12only>
+ : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2),
+ mnemonic#"\t$src1, $src2",
+ [(operator cls:$src1, (load mode:$src2))]> {
+ let mayLoad = 1;
+}
+
+class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load>
+ : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2),
+ mnemonic#"\t$src1, $src2",
+ [(operator cls:$src1, (load bdxaddr12only:$src2))]> {
+ let mayLoad = 1;
+}
+
+class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load,
+ AddressingMode mode = bdxaddr20only>
+ : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2),
+ mnemonic#"\t$src1, $src2",
+ [(operator cls:$src1, (load mode:$src2))]> {
+ let mayLoad = 1;
+}
+
+multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+ SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load> {
+ let Function = mnemonic ## #cls in {
+ let PairType = "12" in
+ def "" : CompareRX<mnemonic, rxOpcode, operator, cls,
+ load, bdxaddr12pair>;
+ let PairType = "20" in
+ def Y : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls,
+ load, bdxaddr20pair>;
+ }
+}
+
+class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ SDPatternOperator load, Immediate imm,
+ AddressingMode mode = bdaddr12only>
+ : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+ mnemonic#"\t$addr, $src",
+ [(operator (load mode:$addr), imm:$src)]> {
+ let mayLoad = 1;
+}
+
+class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ SDPatternOperator load, Immediate imm>
+ : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
+ mnemonic#"\t$addr, $src",
+ [(operator (load bdaddr12only:$addr), imm:$src)]> {
+ let mayLoad = 1;
+}
+
+class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ SDPatternOperator load, Immediate imm,
+ AddressingMode mode = bdaddr20only>
+ : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+ mnemonic#"\t$addr, $src",
+ [(operator (load mode:$addr), imm:$src)]> {
+ let mayLoad = 1;
+}
+
+multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
+ SDPatternOperator operator, SDPatternOperator load,
+ Immediate imm> {
+ let Function = mnemonic in {
+ let PairType = "12" in
+ def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
+ let PairType = "20" in
+ def Y : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm,
+ bdaddr20pair>;
+ }
+}
+
+class TernaryRRD<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, RegisterOperand cls>
+ : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3),
+ mnemonic#"\t$dst, $src2, $src3",
+ [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+}
+
+class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, SDPatternOperator load>
+ : InstRXF<opcode, (outs cls:$dst),
+ (ins cls:$src1, cls:$src2, bdxaddr12only:$src3),
+ mnemonic#"\t$dst, $src2, $src3",
+ [(set cls:$dst, (operator cls:$src1, cls:$src2,
+ (load bdxaddr12only:$src3)))]> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+ let mayLoad = 1;
+}
+
+class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdaddr12only>
+ : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
+ mnemonic#"\t$dst, $new, $ptr",
+ [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
+ let Constraints = "$old = $dst";
+ let DisableEncoding = "$old";
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls, AddressingMode mode = bdaddr20only>
+ : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
+ mnemonic#"\t$dst, $new, $ptr",
+ [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
+ let Constraints = "$old = $dst";
+ let DisableEncoding = "$old";
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
+ SDPatternOperator operator, RegisterOperand cls> {
+ let Function = mnemonic ## #cls in {
+ let PairType = "12" in
+ def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>;
+ let PairType = "20" in
+ def Y : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>;
+ }
+}
+
+class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRIEf<opcode, (outs cls1:$dst),
+ (ins cls1:$src1, cls2:$src2,
+ uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3),
+ mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> {
+ let Constraints = "$src1 = $dst";
+ let DisableEncoding = "$src1";
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+//
+// Convenience instructions that get lowered to real instructions
+// by either SystemZTargetLowering::EmitInstrWithCustomInserter()
+// or SystemZInstrInfo::expandPostRAPseudo().
+//
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, list<dag> pattern>
+ : InstSystemZ<0, outs, ins, "", pattern> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
+// the value of the PSW's 2-bit condition code field.
+class SelectWrapper<RegisterOperand cls>
+ : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, i8imm:$cc),
+ [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, imm:$cc))]> {
+ let usesCustomInserter = 1;
+ // Although the instructions used by these nodes do not in themselves
+ // change the PSW, the insertion requires new blocks, and the PSW cannot
+ // be live across them.
+ let Defs = [PSW];
+ let Uses = [PSW];
+}
+
+// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation. PAT and OPERAND
+// describe the second (non-memory) operand.
+class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
+ dag pat, DAGOperand operand>
+ : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2),
+ [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> {
+ let Defs = [PSW];
+ let Has20BitOffset = 1;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let usesCustomInserter = 1;
+}
+
+// Specializations of AtomicLoadWBinary.
+class AtomicLoadBinaryReg32<SDPatternOperator operator>
+ : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
+class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm>
+ : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
+class AtomicLoadBinaryReg64<SDPatternOperator operator>
+ : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
+class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm>
+ : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
+
+// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND
+// describe the second (non-memory) operand.
+class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
+ DAGOperand operand>
+ : Pseudo<(outs GR32:$dst),
+ (ins bdaddr20only:$ptr, operand:$src2, ADDR32:$bitshift,
+ ADDR32:$negbitshift, uimm32:$bitsize),
+ [(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift,
+ ADDR32:$negbitshift, uimm32:$bitsize))]> {
+ let Defs = [PSW];
+ let Has20BitOffset = 1;
+ let mayLoad = 1;
+ let mayStore = 1;
+ let usesCustomInserter = 1;
+}
+
+// Specializations of AtomicLoadWBinary.
+class AtomicLoadWBinaryReg<SDPatternOperator operator>
+ : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
+class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
+ : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 0000000..0718c83
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,444 @@
+//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZInstrInfo.h"
+#include "SystemZInstrBuilder.h"
+
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRMAP_INFO
+#include "SystemZGenInstrInfo.inc"
+
+using namespace llvm;
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+ : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+ RI(tm, *this) {
+}
+
+// MI is a 128-bit load or store. Split it into two 64-bit loads or stores,
+// each having the opcode given by NewOpcode.
+void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
+ unsigned NewOpcode) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+
+ // Get two load or store instructions. Use the original instruction for one
+ // of them (arbitarily the second here) and create a clone for the other.
+ MachineInstr *EarlierMI = MF.CloneMachineInstr(MI);
+ MBB->insert(MI, EarlierMI);
+
+ // Set up the two 64-bit registers.
+ MachineOperand &HighRegOp = EarlierMI->getOperand(0);
+ MachineOperand &LowRegOp = MI->getOperand(0);
+ HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high));
+ LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low));
+
+ // The address in the first (high) instruction is already correct.
+ // Adjust the offset in the second (low) instruction.
+ MachineOperand &HighOffsetOp = EarlierMI->getOperand(2);
+ MachineOperand &LowOffsetOp = MI->getOperand(2);
+ LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
+
+ // Set the opcodes.
+ unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm());
+ unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm());
+ assert(HighOpcode && LowOpcode && "Both offsets should be in range");
+
+ EarlierMI->setDesc(get(HighOpcode));
+ MI->setDesc(get(LowOpcode));
+}
+
+// Split ADJDYNALLOC instruction MI.
+void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ MachineFrameInfo *MFFrame = MF.getFrameInfo();
+ MachineOperand &OffsetMO = MI->getOperand(2);
+
+ uint64_t Offset = (MFFrame->getMaxCallFrameSize() +
+ SystemZMC::CallFrameSize +
+ OffsetMO.getImm());
+ unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset);
+ assert(NewOpcode && "No support for huge argument lists yet");
+ MI->setDesc(get(NewOpcode));
+ OffsetMO.setImm(Offset);
+}
+
+// If MI is a simple load or store for a frame object, return the register
+// it loads or stores and set FrameIndex to the index of the frame object.
+// Return 0 otherwise.
+//
+// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
+static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if ((MCID.TSFlags & Flag) &&
+ MI->getOperand(1).isFI() &&
+ MI->getOperand(2).getImm() == 0 &&
+ MI->getOperand(3).getReg() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ return 0;
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad);
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore);
+}
+
+bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // Most of the code and comments here are boilerplate.
+
+ // Start from the bottom of the block and work up, examining the
+ // terminator instructions.
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
+ if (!isUnpredicatedTerminator(I))
+ break;
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
+ unsigned ThisCond;
+ const MachineOperand *ThisTarget;
+ if (!isBranch(I, ThisCond, ThisTarget))
+ return true;
+
+ // Can't handle indirect branches.
+ if (!ThisTarget->isMBB())
+ return true;
+
+ if (ThisCond == SystemZ::CCMASK_ANY) {
+ // Handle unconditional branches.
+ if (!AllowModify) {
+ TBB = ThisTarget->getMBB();
+ continue;
+ }
+
+ // If the block has any instructions after a JMP, delete them.
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
+ Cond.clear();
+ FBB = 0;
+
+ // Delete the JMP if it's equivalent to a fall-through.
+ if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) {
+ TBB = 0;
+ I->eraseFromParent();
+ I = MBB.end();
+ continue;
+ }
+
+ // TBB is used to indicate the unconditinal destination.
+ TBB = ThisTarget->getMBB();
+ continue;
+ }
+
+ // Working from the bottom, handle the first conditional branch.
+ if (Cond.empty()) {
+ // FIXME: add X86-style branch swap
+ FBB = TBB;
+ TBB = ThisTarget->getMBB();
+ Cond.push_back(MachineOperand::CreateImm(ThisCond));
+ continue;
+ }
+
+ // Handle subsequent conditional branches.
+ assert(Cond.size() == 1);
+ assert(TBB);
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
+ if (TBB != ThisTarget->getMBB())
+ return true;
+
+ // If the conditions are the same, we can leave them alone.
+ unsigned OldCond = Cond[0].getImm();
+ if (OldCond == ThisCond)
+ continue;
+
+ // FIXME: Try combining conditions like X86 does. Should be easy on Z!
+ }
+
+ return false;
+}
+
+unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ // Most of the code and comments here are boilerplate.
+ MachineBasicBlock::iterator I = MBB.end();
+ unsigned Count = 0;
+
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ unsigned Cond;
+ const MachineOperand *Target;
+ if (!isBranch(I, Cond, Target))
+ break;
+ if (!Target->isMBB())
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned
+SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // In this function we output 32-bit branches, which should always
+ // have enough range. They can be shortened and relaxed by later code
+ // in the pipeline, if desired.
+
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 1 || Cond.size() == 0) &&
+ "SystemZ branch conditions have one component!");
+
+ if (Cond.empty()) {
+ // Unconditional branch?
+ assert(!FBB && "Unconditional branch with multiple successors!");
+ BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(TBB);
+ return 1;
+ }
+
+ // Conditional branch.
+ unsigned Count = 0;
+ unsigned CC = Cond[0].getImm();
+ BuildMI(&MBB, DL, get(SystemZ::BRCL)).addImm(CC).addMBB(TBB);
+ ++Count;
+
+ if (FBB) {
+ // Two-way Conditional branch. Insert the second branch.
+ BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(FBB);
+ ++Count;
+ }
+ return Count;
+}
+
+void
+SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too.
+ if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high),
+ RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc);
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low),
+ RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc);
+ return;
+ }
+
+ // Everything else needs only one instruction.
+ unsigned Opcode;
+ if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::LR;
+ else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::LGR;
+ else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::LER;
+ else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::LDR;
+ else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
+ Opcode = SystemZ::LXR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void
+SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Callers may expect a single instruction, so keep 128-bit moves
+ // together for now and lower them after register allocation.
+ unsigned LoadOpcode, StoreOpcode;
+ getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+ addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode))
+ .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void
+SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Callers may expect a single instruction, so keep 128-bit moves
+ // together for now and lower them after register allocation.
+ unsigned LoadOpcode, StoreOpcode;
+ getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+ addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg),
+ FrameIdx);
+}
+
+bool
+SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ switch (MI->getOpcode()) {
+ case SystemZ::L128:
+ splitMove(MI, SystemZ::LG);
+ return true;
+
+ case SystemZ::ST128:
+ splitMove(MI, SystemZ::STG);
+ return true;
+
+ case SystemZ::LX:
+ splitMove(MI, SystemZ::LD);
+ return true;
+
+ case SystemZ::STX:
+ splitMove(MI, SystemZ::STD);
+ return true;
+
+ case SystemZ::ADJDYNALLOC:
+ splitAdjDynAlloc(MI);
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1 && "Invalid branch condition!");
+ Cond[0].setImm(Cond[0].getImm() ^ SystemZ::CCMASK_ANY);
+ return false;
+}
+
+bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond,
+ const MachineOperand *&Target) const {
+ switch (MI->getOpcode()) {
+ case SystemZ::BR:
+ case SystemZ::J:
+ case SystemZ::JG:
+ Cond = SystemZ::CCMASK_ANY;
+ Target = &MI->getOperand(0);
+ return true;
+
+ case SystemZ::BRC:
+ case SystemZ::BRCL:
+ Cond = MI->getOperand(0).getImm();
+ Target = &MI->getOperand(1);
+ return true;
+
+ default:
+ assert(!MI->getDesc().isBranch() && "Unknown branch opcode");
+ return false;
+ }
+}
+
+void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
+ unsigned &LoadOpcode,
+ unsigned &StoreOpcode) const {
+ if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
+ LoadOpcode = SystemZ::L;
+ StoreOpcode = SystemZ::ST32;
+ } else if (RC == &SystemZ::GR64BitRegClass ||
+ RC == &SystemZ::ADDR64BitRegClass) {
+ LoadOpcode = SystemZ::LG;
+ StoreOpcode = SystemZ::STG;
+ } else if (RC == &SystemZ::GR128BitRegClass ||
+ RC == &SystemZ::ADDR128BitRegClass) {
+ LoadOpcode = SystemZ::L128;
+ StoreOpcode = SystemZ::ST128;
+ } else if (RC == &SystemZ::FP32BitRegClass) {
+ LoadOpcode = SystemZ::LE;
+ StoreOpcode = SystemZ::STE;
+ } else if (RC == &SystemZ::FP64BitRegClass) {
+ LoadOpcode = SystemZ::LD;
+ StoreOpcode = SystemZ::STD;
+ } else if (RC == &SystemZ::FP128BitRegClass) {
+ LoadOpcode = SystemZ::LX;
+ StoreOpcode = SystemZ::STX;
+ } else
+ llvm_unreachable("Unsupported regclass to load or store");
+}
+
+unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
+ int64_t Offset) const {
+ const MCInstrDesc &MCID = get(Opcode);
+ int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset);
+ if (isUInt<12>(Offset) && isUInt<12>(Offset2)) {
+ // Get the instruction to use for unsigned 12-bit displacements.
+ int Disp12Opcode = SystemZ::getDisp12Opcode(Opcode);
+ if (Disp12Opcode >= 0)
+ return Disp12Opcode;
+
+ // All address-related instructions can use unsigned 12-bit
+ // displacements.
+ return Opcode;
+ }
+ if (isInt<20>(Offset) && isInt<20>(Offset2)) {
+ // Get the instruction to use for signed 20-bit displacements.
+ int Disp20Opcode = SystemZ::getDisp20Opcode(Opcode);
+ if (Disp20Opcode >= 0)
+ return Disp20Opcode;
+
+ // Check whether Opcode allows signed 20-bit displacements.
+ if (MCID.TSFlags & SystemZII::Has20BitOffset)
+ return Opcode;
+ }
+ return 0;
+}
+
+void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Reg, uint64_t Value) const {
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ unsigned Opcode;
+ if (isInt<16>(Value))
+ Opcode = SystemZ::LGHI;
+ else if (SystemZ::isImmLL(Value))
+ Opcode = SystemZ::LLILL;
+ else if (SystemZ::isImmLH(Value)) {
+ Opcode = SystemZ::LLILH;
+ Value >>= 16;
+ } else {
+ assert(isInt<32>(Value) && "Huge values not handled yet");
+ Opcode = SystemZ::LGFI;
+ }
+ BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 0000000..0fc4761
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,123 @@
+//===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
+#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+namespace SystemZII {
+ enum {
+ // See comments in SystemZInstrFormats.td.
+ SimpleBDXLoad = (1 << 0),
+ SimpleBDXStore = (1 << 1),
+ Has20BitOffset = (1 << 2),
+ HasIndex = (1 << 3),
+ Is128Bit = (1 << 4)
+ };
+ // SystemZ MachineOperand target flags.
+ enum {
+ // Masks out the bits for the access model.
+ MO_SYMBOL_MODIFIER = (1 << 0),
+
+ // @GOT (aka @GOTENT)
+ MO_GOT = (1 << 0)
+ };
+}
+
+class SystemZInstrInfo : public SystemZGenInstrInfo {
+ const SystemZRegisterInfo RI;
+
+ void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
+ void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
+
+public:
+ explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+
+ // Override TargetInstrInfo.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const LLVM_OVERRIDE;
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const LLVM_OVERRIDE;
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const LLVM_OVERRIDE;
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const LLVM_OVERRIDE;
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const LLVM_OVERRIDE;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const LLVM_OVERRIDE;
+ virtual void
+ storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
+ virtual void
+ loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
+ virtual bool
+ expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE;
+ virtual bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+ LLVM_OVERRIDE;
+
+ // Return the SystemZRegisterInfo, which this class owns.
+ const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+ // Return true if MI is a conditional or unconditional branch.
+ // When returning true, set Cond to the mask of condition-code
+ // values on which the instruction will branch, and set Target
+ // to the operand that contains the branch target. This target
+ // can be a register or a basic block.
+ bool isBranch(const MachineInstr *MI, unsigned &Cond,
+ const MachineOperand *&Target) const;
+
+ // Get the load and store opcodes for a given register class.
+ void getLoadStoreOpcodes(const TargetRegisterClass *RC,
+ unsigned &LoadOpcode, unsigned &StoreOpcode) const;
+
+ // Opcode is the opcode of an instruction that has an address operand,
+ // and the caller wants to perform that instruction's operation on an
+ // address that has displacement Offset. Return the opcode of a suitable
+ // instruction (which might be Opcode itself) or 0 if no such instruction
+ // exists.
+ unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
+
+ // Emit code before MBBI in MI to move immediate value Value into
+ // physical register Reg.
+ void loadImmediate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned Reg, uint64_t Value) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 0000000..7ffa382
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,955 @@
+//===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+
+let neverHasSideEffects = 1 in {
+ // Takes as input the value of the stack pointer after a dynamic allocation
+ // has been made. Sets the output to the address of the dynamically-
+ // allocated area itself, skipping the outgoing arguments.
+ //
+ // This expands to an LA or LAY instruction. We restrict the offset
+ // to the range of LA and keep the LAY range in reserve for when
+ // the size of the outgoing arguments is added.
+ def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
+ [(set GR64:$dst, dynalloc12only:$src)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Control flow instructions
+//===----------------------------------------------------------------------===//
+
+// A return instruction. R1 is the condition-code mask (all 1s)
+// and R2 is the target address, which is always stored in %r14.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1,
+ R1 = 15, R2 = 14, isCodeGenOnly = 1 in {
+ def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>;
+}
+
+// Unconditional branches. R1 is the condition-code mask (all 1s).
+let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
+ let isIndirectBranch = 1 in
+ def BR : InstRR<0x07, (outs), (ins ADDR64:$dst),
+ "br\t$dst", [(brind ADDR64:$dst)]>;
+
+ // An assembler extended mnemonic for BRC. Use a separate instruction for
+ // the asm parser, so that we don't relax Js to external symbols into JGs.
+ let isCodeGenOnly = 1 in
+ def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
+ let isAsmParserOnly = 1 in
+ def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
+
+ // An assembler extended mnemonic for BRCL. (The extension is "G"
+ // rather than "L" because "JL" is "Jump if Less".)
+ def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
+ "jg\t$dst", [(br bb:$dst)]>;
+}
+
+// Conditional branches. It's easier for LLVM to handle these branches
+// in their raw BRC/BRCL form, with the 4-bit condition-code mask being
+// the first operand. It seems friendlier to use mnemonic forms like
+// JE and JLH when writing out the assembly though.
+multiclass CondBranches<Operand imm, string short, string long> {
+ let isBranch = 1, isTerminator = 1, Uses = [PSW] in {
+ def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>;
+ def L : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>;
+ }
+}
+let isCodeGenOnly = 1 in
+ defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">;
+let isAsmParserOnly = 1 in
+ defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">;
+
+def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>;
+
+// Define AsmParser mnemonics for each condition code.
+multiclass CondExtendedMnemonic<bits<4> Cond, string name> {
+ let R1 = Cond in {
+ def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst),
+ "j"##name##"\t$dst", []>;
+ def L : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
+ "jg"##name##"\t$dst", []>;
+ }
+}
+let isAsmParserOnly = 1 in {
+ defm AsmJO : CondExtendedMnemonic<1, "o">;
+ defm AsmJH : CondExtendedMnemonic<2, "h">;
+ defm AsmJNLE : CondExtendedMnemonic<3, "nle">;
+ defm AsmJL : CondExtendedMnemonic<4, "l">;
+ defm AsmJNHE : CondExtendedMnemonic<5, "nhe">;
+ defm AsmJLH : CondExtendedMnemonic<6, "lh">;
+ defm AsmJNE : CondExtendedMnemonic<7, "ne">;
+ defm AsmJE : CondExtendedMnemonic<8, "e">;
+ defm AsmJNLH : CondExtendedMnemonic<9, "nlh">;
+ defm AsmJHE : CondExtendedMnemonic<10, "he">;
+ defm AsmJNL : CondExtendedMnemonic<11, "nl">;
+ defm AsmJLE : CondExtendedMnemonic<12, "le">;
+ defm AsmJNH : CondExtendedMnemonic<13, "nh">;
+ defm AsmJNO : CondExtendedMnemonic<14, "no">;
+}
+
+def Select32 : SelectWrapper<GR32>;
+def Select64 : SelectWrapper<GR64>;
+
+//===----------------------------------------------------------------------===//
+// Call instructions
+//===----------------------------------------------------------------------===//
+
+// The definitions here are for the call-clobbered registers.
+let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
+ F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
+ R1 = 14, isCodeGenOnly = 1 in {
+ def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops),
+ "bras\t%r14, $dst", []>;
+ def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops),
+ "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>;
+ def BASR : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops),
+ "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>;
+}
+
+// Define the general form of the call instructions for the asm parser.
+// These instructions don't hard-code %r14 as the return address register.
+let isAsmParserOnly = 1 in {
+ def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst),
+ "bras\t$save, $dst", []>;
+ def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst),
+ "brasl\t$save, $dst", []>;
+ def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst),
+ "basr\t$save, $dst", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Register moves.
+let neverHasSideEffects = 1 in {
+ def LR : UnaryRR <"lr", 0x18, null_frag, GR32, GR32>;
+ def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
+}
+
+// Immediate moves.
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ // 16-bit sign-extended immediates.
+ def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>;
+ def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>;
+
+ // Other 16-bit immediates.
+ def LLILL : UnaryRI<"llill", 0xA5F, bitconvert, GR64, imm64ll16>;
+ def LLILH : UnaryRI<"llilh", 0xA5E, bitconvert, GR64, imm64lh16>;
+ def LLIHL : UnaryRI<"llihl", 0xA5D, bitconvert, GR64, imm64hl16>;
+ def LLIHH : UnaryRI<"llihh", 0xA5C, bitconvert, GR64, imm64hh16>;
+
+ // 32-bit immediates.
+ def LGFI : UnaryRIL<"lgfi", 0xC01, bitconvert, GR64, imm64sx32>;
+ def LLILF : UnaryRIL<"llilf", 0xC0F, bitconvert, GR64, imm64lf32>;
+ def LLIHF : UnaryRIL<"llihf", 0xC0E, bitconvert, GR64, imm64hf32>;
+}
+
+// Register loads.
+let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+ defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>;
+ def LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
+
+ def LG : UnaryRXY<"lg", 0xE304, load, GR64>;
+ def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
+
+ // These instructions are split after register allocation, so we don't
+ // want a custom inserter.
+ let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+ def L128 : Pseudo<(outs GR128:$dst), (ins bdxaddr20only128:$src),
+ [(set GR128:$dst, (load bdxaddr20only128:$src))]>;
+ }
+}
+
+// Register stores.
+let SimpleBDXStore = 1 in {
+ let isCodeGenOnly = 1 in {
+ defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32>;
+ def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
+ }
+
+ def STG : StoreRXY<"stg", 0xE324, store, GR64>;
+ def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
+
+ // These instructions are split after register allocation, so we don't
+ // want a custom inserter.
+ let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+ def ST128 : Pseudo<(outs), (ins GR128:$src, bdxaddr20only128:$dst),
+ [(store GR128:$src, bdxaddr20only128:$dst)]>;
+ }
+}
+
+// 8-bit immediate stores to 8-bit fields.
+defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>;
+
+// 16-bit immediate stores to 16-, 32- or 64-bit fields.
+def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>;
+def MVHI : StoreSIL<"mvhi", 0xE54C, store, imm32sx16>;
+def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+// 32-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+ def LBR : UnaryRRE<"lbr", 0xB926, sext8, GR32, GR32>;
+ def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>;
+}
+
+// 64-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+ def LGBR : UnaryRRE<"lgbr", 0xB906, sext8, GR64, GR64>;
+ def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>;
+ def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>;
+}
+
+// Match 32-to-64-bit sign extensions in which the source is already
+// in a 64-bit register.
+def : Pat<(sext_inreg GR64:$src, i32),
+ (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// 32-bit extensions from memory.
+def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32>;
+defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32>;
+def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>;
+
+// 64-bit extensions from memory.
+def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64>;
+def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64>;
+def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64>;
+def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>;
+def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>;
+
+// If the sign of a load-extend operation doesn't matter, use the signed ones.
+// There's not really much to choose between the sign and zero extensions,
+// but LH is more compact than LLH for small offsets.
+def : Pat<(i32 (extloadi8 bdxaddr20only:$src)), (LB bdxaddr20only:$src)>;
+def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH bdxaddr12pair:$src)>;
+def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>;
+
+def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>;
+def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>;
+def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+// 32-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+ def LLCR : UnaryRRE<"llcr", 0xB994, zext8, GR32, GR32>;
+ def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>;
+}
+
+// 64-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+ def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8, GR64, GR64>;
+ def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>;
+ def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>;
+}
+
+// Match 32-to-64-bit zero extensions in which the source is already
+// in a 64-bit register.
+def : Pat<(and GR64:$src, 0xffffffff),
+ (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// 32-bit extensions from memory.
+def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32>;
+def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32>;
+def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>;
+
+// 64-bit extensions from memory.
+def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64>;
+def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64>;
+def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64>;
+def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>;
+def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+// Truncations of 64-bit registers to 32-bit registers.
+def : Pat<(i32 (trunc GR64:$src)),
+ (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+
+// Truncations of 32-bit registers to memory.
+let isCodeGenOnly = 1 in {
+ defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32>;
+ defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32>;
+ def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
+}
+
+// Truncations of 64-bit registers to memory.
+defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64>;
+defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64>;
+def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>;
+defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64>;
+def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Multi-register loads.
+def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>;
+
+// Multi-register stores.
+def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+// Byte-swapping register moves.
+let neverHasSideEffects = 1 in {
+ def LRVR : UnaryRRE<"lrvr", 0xB91F, bswap, GR32, GR32>;
+ def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
+}
+
+// Byte-swapping loads.
+def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap>, GR32>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap>, GR64>;
+
+// Byte-swapping stores.
+def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap>, GR32>;
+def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+// Load BDX-style addresses.
+let neverHasSideEffects = 1, Function = "la" in {
+ let PairType = "12" in
+ def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src),
+ "la\t$dst, $src",
+ [(set GR64:$dst, laaddr12pair:$src)]>;
+ let PairType = "20" in
+ def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src),
+ "lay\t$dst, $src",
+ [(set GR64:$dst, laaddr20pair:$src)]>;
+}
+
+// Load a PC-relative address. There's no version of this instruction
+// with a 16-bit offset, so there's no relaxation.
+let neverHasSideEffects = 1 in {
+ def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src),
+ "larl\t$dst, $src",
+ [(set GR64:$dst, pcrel32:$src)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Negation
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+ def LCR : UnaryRR <"lcr", 0x13, ineg, GR32, GR32>;
+ def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>;
+ def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>;
+}
+defm : SXU<ineg, LCGFR>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in
+ defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8>;
+defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8>;
+
+defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>;
+
+defm : InsertMem<"inserti8", IC, GR64, zextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>;
+
+// Insertions of a 16-bit immediate, leaving other bits unaffected.
+// We don't have or_as_insert equivalents of these operations because
+// OI is available instead.
+let isCodeGenOnly = 1 in {
+ def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
+ def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
+}
+def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>;
+def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>;
+def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>;
+def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>;
+
+// ...likewise for 32-bit immediates. For GR32s this is a general
+// full-width move. (We use IILF rather than something like LLILF
+// for 32-bit moves because IILF leaves the upper 32 bits of the
+// GR64 unchanged.)
+let isCodeGenOnly = 1 in {
+ def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+}
+def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>;
+def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>;
+
+// An alternative model of inserthf, with the first operand being
+// a zero-extended value.
+def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
+ (IIHF (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit),
+ imm64hf32:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+// Plain addition.
+let Defs = [PSW] in {
+ // Addition of a register.
+ let isCommutable = 1 in {
+ def AR : BinaryRR <"ar", 0x1A, add, GR32, GR32>;
+ def AGR : BinaryRRE<"agr", 0xB908, add, GR64, GR64>;
+ }
+ def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
+
+ // Addition of signed 16-bit immediates.
+ def AHI : BinaryRI<"ahi", 0xA7A, add, GR32, imm32sx16>;
+ def AGHI : BinaryRI<"aghi", 0xA7B, add, GR64, imm64sx16>;
+
+ // Addition of signed 32-bit immediates.
+ def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>;
+ def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>;
+
+ // Addition of memory.
+ defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16>;
+ defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load>;
+ def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32>;
+ def AG : BinaryRXY<"ag", 0xE308, add, GR64, load>;
+
+ // Addition to memory.
+ def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>;
+ def AGSI : BinarySIY<"agsi", 0xEB7A, add, imm64sx8>;
+}
+defm : SXB<add, GR64, AGFR>;
+
+// Addition producing a carry.
+let Defs = [PSW] in {
+ // Addition of a register.
+ let isCommutable = 1 in {
+ def ALR : BinaryRR <"alr", 0x1E, addc, GR32, GR32>;
+ def ALGR : BinaryRRE<"algr", 0xB90A, addc, GR64, GR64>;
+ }
+ def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
+
+ // Addition of unsigned 32-bit immediates.
+ def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>;
+ def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>;
+
+ // Addition of memory.
+ defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load>;
+ def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32>;
+ def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load>;
+}
+defm : ZXB<addc, GR64, ALGFR>;
+
+// Addition producing and using a carry.
+let Defs = [PSW], Uses = [PSW] in {
+ // Addition of a register.
+ def ALCR : BinaryRRE<"alcr", 0xB998, adde, GR32, GR32>;
+ def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>;
+
+ // Addition of memory.
+ def ALC : BinaryRXY<"alc", 0xE398, adde, GR32, load>;
+ def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+// Plain substraction. Although immediate forms exist, we use the
+// add-immediate instruction instead.
+let Defs = [PSW] in {
+ // Subtraction of a register.
+ def SR : BinaryRR <"sr", 0x1B, sub, GR32, GR32>;
+ def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
+ def SGR : BinaryRRE<"sgr", 0xB909, sub, GR64, GR64>;
+
+ // Subtraction of memory.
+ defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>;
+ def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>;
+ def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load>;
+}
+defm : SXB<sub, GR64, SGFR>;
+
+// Subtraction producing a carry.
+let Defs = [PSW] in {
+ // Subtraction of a register.
+ def SLR : BinaryRR <"slr", 0x1F, subc, GR32, GR32>;
+ def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
+ def SLGR : BinaryRRE<"slgr", 0xB90B, subc, GR64, GR64>;
+
+ // Subtraction of unsigned 32-bit immediates. These don't match
+ // subc because we prefer addc for constants.
+ def SLFI : BinaryRIL<"slfi", 0xC25, null_frag, GR32, uimm32>;
+ def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>;
+
+ // Subtraction of memory.
+ defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load>;
+ def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32>;
+ def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load>;
+}
+defm : ZXB<subc, GR64, SLGFR>;
+
+// Subtraction producing and using a carry.
+let Defs = [PSW], Uses = [PSW] in {
+ // Subtraction of a register.
+ def SLBR : BinaryRRE<"slbr", 0xB999, sube, GR32, GR32>;
+ def SLGBR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>;
+
+ // Subtraction of memory.
+ def SLB : BinaryRXY<"slb", 0xE399, sube, GR32, load>;
+ def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+ // ANDs of a register.
+ let isCommutable = 1 in {
+ def NR : BinaryRR <"nr", 0x14, and, GR32, GR32>;
+ def NGR : BinaryRRE<"ngr", 0xB980, and, GR64, GR64>;
+ }
+
+ // ANDs of a 16-bit immediate, leaving other bits unaffected.
+ let isCodeGenOnly = 1 in {
+ def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
+ def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+ }
+ def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>;
+ def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>;
+ def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>;
+ def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
+
+ // ANDs of a 32-bit immediate, leaving other bits unaffected.
+ let isCodeGenOnly = 1 in
+ def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
+ def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
+ def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
+
+ // ANDs of memory.
+ defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load>;
+ def NG : BinaryRXY<"ng", 0xE380, and, GR64, load>;
+
+ // AND to memory
+ defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
+}
+defm : RMWIByte<and, bdaddr12pair, NI>;
+defm : RMWIByte<and, bdaddr20pair, NIY>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+ // ORs of a register.
+ let isCommutable = 1 in {
+ def OR : BinaryRR <"or", 0x16, or, GR32, GR32>;
+ def OGR : BinaryRRE<"ogr", 0xB981, or, GR64, GR64>;
+ }
+
+ // ORs of a 16-bit immediate, leaving other bits unaffected.
+ let isCodeGenOnly = 1 in {
+ def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
+ def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
+ }
+ def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>;
+ def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>;
+ def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>;
+ def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
+
+ // ORs of a 32-bit immediate, leaving other bits unaffected.
+ let isCodeGenOnly = 1 in
+ def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
+ def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
+ def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
+
+ // ORs of memory.
+ defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load>;
+ def OG : BinaryRXY<"og", 0xE381, or, GR64, load>;
+
+ // OR to memory
+ defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
+}
+defm : RMWIByte<or, bdaddr12pair, OI>;
+defm : RMWIByte<or, bdaddr20pair, OIY>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+ // XORs of a register.
+ let isCommutable = 1 in {
+ def XR : BinaryRR <"xr", 0x17, xor, GR32, GR32>;
+ def XGR : BinaryRRE<"xgr", 0xB982, xor, GR64, GR64>;
+ }
+
+ // XORs of a 32-bit immediate, leaving other bits unaffected.
+ let isCodeGenOnly = 1 in
+ def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
+ def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
+ def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
+
+ // XORs of memory.
+ defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load>;
+ def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load>;
+
+ // XOR to memory
+ defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
+}
+defm : RMWIByte<xor, bdaddr12pair, XI>;
+defm : RMWIByte<xor, bdaddr20pair, XIY>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+// Multiplication of a register.
+let isCommutable = 1 in {
+ def MSR : BinaryRRE<"msr", 0xB252, mul, GR32, GR32>;
+ def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>;
+}
+def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>;
+defm : SXB<mul, GR64, MSGFR>;
+
+// Multiplication of a signed 16-bit immediate.
+def MHI : BinaryRI<"mhi", 0xA7C, mul, GR32, imm32sx16>;
+def MGHI : BinaryRI<"mghi", 0xA7D, mul, GR64, imm64sx16>;
+
+// Multiplication of a signed 32-bit immediate.
+def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>;
+def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
+
+// Multiplication of memory.
+defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16>;
+defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load>;
+def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32>;
+def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load>;
+
+// Multiplication of a register, producing two results.
+def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>;
+
+// Multiplication of memory, producing two results.
+def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+// Division and remainder, from registers.
+def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>;
+def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
+def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
+def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
+defm : SXB<z_sdivrem64, GR128, DSGFR>;
+
+// Division and remainder, from memory.
+def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>;
+def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load>;
+def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load>;
+def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+// Shift left.
+let neverHasSideEffects = 1 in {
+ def SLL : ShiftRS <"sll", 0x89, shl, GR32, shift12only>;
+ def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64, shift20only>;
+}
+
+// Logical shift right.
+let neverHasSideEffects = 1 in {
+ def SRL : ShiftRS <"srl", 0x88, srl, GR32, shift12only>;
+ def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64, shift20only>;
+}
+
+// Arithmetic shift right.
+let Defs = [PSW] in {
+ def SRA : ShiftRS <"sra", 0x8A, sra, GR32, shift12only>;
+ def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64, shift20only>;
+}
+
+// Rotate left.
+let neverHasSideEffects = 1 in {
+ def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32, shift20only>;
+ def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64, shift20only>;
+}
+
+// Rotate second operand left and inserted selected bits into first operand.
+// These can act like 32-bit operands provided that the constant start and
+// end bits (operands 2 and 3) are in the range [32, 64)
+let Defs = [PSW] in {
+ let isCodeGenOnly = 1 in
+ def RISBG32 : RotateSelectRIEf<"risbg", 0xEC55, GR32, GR32>;
+ def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+// Signed comparisons.
+let Defs = [PSW] in {
+ // Comparison with a register.
+ def CR : CompareRR <"cr", 0x19, z_cmp, GR32, GR32>;
+ def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>;
+ def CGR : CompareRRE<"cgr", 0xB920, z_cmp, GR64, GR64>;
+
+ // Comparison with a signed 16-bit immediate.
+ def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>;
+ def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>;
+
+ // Comparison with a signed 32-bit immediate.
+ def CFI : CompareRIL<"cfi", 0xC2D, z_cmp, GR32, simm32>;
+ def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>;
+
+ // Comparison with memory.
+ defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16>;
+ defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load>;
+ def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16>;
+ def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32>;
+ def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load>;
+ def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>;
+ def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>;
+ def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>;
+ def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>;
+ def CGRL : CompareRILPC<"cgrl", 0xC68, z_cmp, GR64, aligned_load>;
+
+ // Comparison between memory and a signed 16-bit immediate.
+ def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>;
+ def CHSI : CompareSIL<"chsi", 0xE55C, z_cmp, load, imm32sx16>;
+ def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load, imm64sx16>;
+}
+defm : SXB<z_cmp, GR64, CGFR>;
+
+// Unsigned comparisons.
+let Defs = [PSW] in {
+ // Comparison with a register.
+ def CLR : CompareRR <"clr", 0x15, z_ucmp, GR32, GR32>;
+ def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>;
+ def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>;
+
+ // Comparison with a signed 32-bit immediate.
+ def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>;
+ def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
+
+ // Comparison with memory.
+ defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load>;
+ def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32>;
+ def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load>;
+ def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32,
+ aligned_zextloadi16>;
+ def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32,
+ aligned_load>;
+ def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64,
+ aligned_zextloadi16>;
+ def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64,
+ aligned_zextloadi32>;
+ def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64,
+ aligned_load>;
+
+ // Comparison between memory and an unsigned 8-bit immediate.
+ defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>;
+
+ // Comparison between memory and an unsigned 16-bit immediate.
+ def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>;
+ def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
+ def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
+}
+defm : ZXB<z_ucmp, GR64, CLGFR>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def ATOMIC_SWAPW : AtomicLoadWBinaryReg<z_atomic_swapw>;
+def ATOMIC_SWAP_32 : AtomicLoadBinaryReg32<atomic_swap_32>;
+def ATOMIC_SWAP_64 : AtomicLoadBinaryReg64<atomic_swap_64>;
+
+def ATOMIC_LOADW_AR : AtomicLoadWBinaryReg<z_atomic_loadw_add>;
+def ATOMIC_LOADW_AFI : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>;
+def ATOMIC_LOAD_AR : AtomicLoadBinaryReg32<atomic_load_add_32>;
+def ATOMIC_LOAD_AHI : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>;
+def ATOMIC_LOAD_AFI : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>;
+def ATOMIC_LOAD_AGR : AtomicLoadBinaryReg64<atomic_load_add_64>;
+def ATOMIC_LOAD_AGHI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>;
+def ATOMIC_LOAD_AGFI : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>;
+
+def ATOMIC_LOADW_SR : AtomicLoadWBinaryReg<z_atomic_loadw_sub>;
+def ATOMIC_LOAD_SR : AtomicLoadBinaryReg32<atomic_load_sub_32>;
+def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>;
+
+def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
+def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
+def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>;
+def ATOMIC_LOAD_NILL32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
+def ATOMIC_LOAD_NILH32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
+def ATOMIC_LOAD_NILF32 : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
+def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>;
+def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
+def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
+def ATOMIC_LOAD_NIHL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
+def ATOMIC_LOAD_NIHH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
+def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
+def ATOMIC_LOAD_NIHF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
+
+def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
+def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
+def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>;
+def ATOMIC_LOAD_OILL32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
+def ATOMIC_LOAD_OILH32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
+def ATOMIC_LOAD_OILF32 : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
+def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>;
+def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
+def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
+def ATOMIC_LOAD_OIHL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
+def ATOMIC_LOAD_OIHH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
+def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
+def ATOMIC_LOAD_OIHF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+
+def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
+def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
+def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>;
+def ATOMIC_LOAD_XILF32 : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
+def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>;
+def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
+def ATOMIC_LOAD_XIHF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+
+def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
+def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
+ imm32lh16c>;
+def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>;
+def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+ imm32ll16c>;
+def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+ imm32lh16c>;
+def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
+def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>;
+def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64ll16c>;
+def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64lh16c>;
+def ATOMIC_LOAD_NIHLi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64hl16c>;
+def ATOMIC_LOAD_NIHHi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64hh16c>;
+def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64lf32c>;
+def ATOMIC_LOAD_NIHFi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+ imm64hf32c>;
+
+def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>;
+def ATOMIC_LOAD_MIN_32 : AtomicLoadBinaryReg32<atomic_load_min_32>;
+def ATOMIC_LOAD_MIN_64 : AtomicLoadBinaryReg64<atomic_load_min_64>;
+
+def ATOMIC_LOADW_MAX : AtomicLoadWBinaryReg<z_atomic_loadw_max>;
+def ATOMIC_LOAD_MAX_32 : AtomicLoadBinaryReg32<atomic_load_max_32>;
+def ATOMIC_LOAD_MAX_64 : AtomicLoadBinaryReg64<atomic_load_max_64>;
+
+def ATOMIC_LOADW_UMIN : AtomicLoadWBinaryReg<z_atomic_loadw_umin>;
+def ATOMIC_LOAD_UMIN_32 : AtomicLoadBinaryReg32<atomic_load_umin_32>;
+def ATOMIC_LOAD_UMIN_64 : AtomicLoadBinaryReg64<atomic_load_umin_64>;
+
+def ATOMIC_LOADW_UMAX : AtomicLoadWBinaryReg<z_atomic_loadw_umax>;
+def ATOMIC_LOAD_UMAX_32 : AtomicLoadBinaryReg32<atomic_load_umax_32>;
+def ATOMIC_LOAD_UMAX_64 : AtomicLoadBinaryReg64<atomic_load_umax_64>;
+
+def ATOMIC_CMP_SWAPW
+ : Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
+ ADDR32:$bitshift, ADDR32:$negbitshift,
+ uimm32:$bitsize),
+ [(set GR32:$dst,
+ (z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
+ ADDR32:$bitshift, ADDR32:$negbitshift,
+ uimm32:$bitsize))]> {
+ let Defs = [PSW];
+ let mayLoad = 1;
+ let mayStore = 1;
+ let usesCustomInserter = 1;
+}
+
+let Defs = [PSW] in {
+ defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>;
+ def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Read a 32-bit access register into a GR32. As with all GR32 operations,
+// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
+// when a 64-bit address is stored in a pair of access registers.
+def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src),
+ "ear\t$dst, $src",
+ [(set GR32:$dst, (z_extract_access access_reg:$src))]>;
+
+// Find leftmost one, AKA count leading zeros. The instruction actually
+// returns a pair of GR64s, the first giving the number of leading zeros
+// and the second giving a copy of the source with the leftmost one bit
+// cleared. We only use the first result here.
+let Defs = [PSW] in {
+ def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
+}
+def : Pat<(ctlz GR64:$src),
+ (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>;
+
+// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
+def : Pat<(i64 (anyext GR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
+
+// There are no 32-bit equivalents of LLILL and LLILH, so use a full
+// 64-bit move followed by a subreg. This preserves the invariant that
+// all GR32 operations only modify the low 32 bits.
+def : Pat<(i32 imm32ll16:$src),
+ (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>;
+def : Pat<(i32 imm32lh16:$src),
+ (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>;
+
+// Extend GR32s and GR64s to GR128s.
+let usesCustomInserter = 1 in {
+ def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+ def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>;
+ def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// Use AL* for GR64 additions of unsigned 32-bit values.
+defm : ZXB<add, GR64, ALGFR>;
+def : Pat<(add GR64:$src1, imm64zx32:$src2),
+ (ALGFI GR64:$src1, imm64zx32:$src2)>;
+def : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+ (ALGF GR64:$src1, bdxaddr20only:$addr)>;
+
+// Use SL* for GR64 subtractions of unsigned 32-bit values.
+defm : ZXB<sub, GR64, SLGFR>;
+def : Pat<(add GR64:$src1, imm64zx32n:$src2),
+ (SLGFI GR64:$src1, imm64zx32n:$src2)>;
+def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+ (SLGF GR64:$src1, bdxaddr20only:$addr)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
new file mode 100644
index 0000000..5d83321
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -0,0 +1,116 @@
+//===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCInstLower.h"
+#include "SystemZAsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+// Where relaxable pairs of reloc-generating instructions exist,
+// we tend to use the longest form by default, since that produces
+// correct assembly in cases where no relaxation is performed.
+// If Opcode is one such instruction, return the opcode for the
+// shortest possible form instead, otherwise return Opcode itself.
+static unsigned getShortenedInstr(unsigned Opcode) {
+ switch (Opcode) {
+ case SystemZ::BRCL: return SystemZ::BRC;
+ case SystemZ::JG: return SystemZ::J;
+ case SystemZ::BRASL: return SystemZ::BRAS;
+ }
+ return Opcode;
+}
+
+// Return the VK_* enumeration for MachineOperand target flags Flags.
+static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
+ switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) {
+ case 0:
+ return MCSymbolRefExpr::VK_None;
+ case SystemZII::MO_GOT:
+ return MCSymbolRefExpr::VK_GOT;
+ }
+ llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
+}
+
+SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+ SystemZAsmPrinter &asmprinter)
+ : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {}
+
+MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
+ const MCSymbol *Symbol,
+ int64_t Offset) const {
+ MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
+ if (Offset) {
+ const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+ Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ }
+ return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("unknown operand type");
+
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ return MCOperand();
+ return MCOperand::CreateReg(MO.getReg());
+
+ case MachineOperand::MO_Immediate:
+ return MCOperand::CreateImm(MO.getImm());
+
+ case MachineOperand::MO_MachineBasicBlock:
+ return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(),
+ /* MO has no offset field */0);
+
+ case MachineOperand::MO_GlobalAddress:
+ return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()),
+ MO.getOffset());
+
+ case MachineOperand::MO_ExternalSymbol: {
+ StringRef Name = MO.getSymbolName();
+ return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name),
+ MO.getOffset());
+ }
+
+ case MachineOperand::MO_JumpTableIndex:
+ return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()),
+ /* MO has no offset field */0);
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()),
+ MO.getOffset());
+
+ case MachineOperand::MO_BlockAddress: {
+ const BlockAddress *BA = MO.getBlockAddress();
+ return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA),
+ MO.getOffset());
+ }
+ }
+}
+
+void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ unsigned Opcode = MI->getOpcode();
+ // When emitting binary code, start with the shortest form of an instruction
+ // and then relax it where necessary.
+ if (!AsmPrinter.OutStreamer.hasRawTextSupport())
+ Opcode = getShortenedInstr(Opcode);
+ OutMI.setOpcode(Opcode);
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ MCOperand MCOp = lowerOperand(MO);
+ if (MCOp.isValid())
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
new file mode 100644
index 0000000..afa72f3
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -0,0 +1,47 @@
+//===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZMCINSTLOWER_H
+#define LLVM_SYSTEMZMCINSTLOWER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCContext;
+class MCInst;
+class MCOperand;
+class MCSymbol;
+class MachineInstr;
+class MachineOperand;
+class Mangler;
+class SystemZAsmPrinter;
+
+class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower {
+ Mangler *Mang;
+ MCContext &Ctx;
+ SystemZAsmPrinter &AsmPrinter;
+
+public:
+ SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+ SystemZAsmPrinter &asmPrinter);
+
+ // Lower MachineInstr MI to MCInst OutMI.
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+ // Return an MCOperand for MO. Return an empty operand if MO is implicit.
+ MCOperand lowerOperand(const MachineOperand& MO) const;
+
+ // Return an MCOperand for MO, given that it equals Symbol + Offset.
+ MCOperand lowerSymbolOperand(const MachineOperand &MO,
+ const MCSymbol *Symbol, int64_t Offset) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 0000000..1dc05a7e
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,74 @@
+//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
+#define SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+ unsigned SavedGPRFrameSize;
+ unsigned LowSavedGPR;
+ unsigned HighSavedGPR;
+ unsigned VarArgsFirstGPR;
+ unsigned VarArgsFirstFPR;
+ unsigned VarArgsFrameIndex;
+ unsigned RegSaveFrameIndex;
+ bool ManipulatesSP;
+
+public:
+ explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+ : SavedGPRFrameSize(0), LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0),
+ VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0),
+ ManipulatesSP(false) {}
+
+ // Get and set the number of bytes allocated by generic code to store
+ // call-saved GPRs.
+ unsigned getSavedGPRFrameSize() const { return SavedGPRFrameSize; }
+ void setSavedGPRFrameSize(unsigned bytes) { SavedGPRFrameSize = bytes; }
+
+ // Get and set the first call-saved GPR that should be saved and restored
+ // by this function. This is 0 if no GPRs need to be saved or restored.
+ unsigned getLowSavedGPR() const { return LowSavedGPR; }
+ void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; }
+
+ // Get and set the last call-saved GPR that should be saved and restored
+ // by this function.
+ unsigned getHighSavedGPR() const { return HighSavedGPR; }
+ void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; }
+
+ // Get and set the number of fixed (as opposed to variable) arguments
+ // that are passed in GPRs to this function.
+ unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+ void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+
+ // Likewise FPRs.
+ unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; }
+ void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; }
+
+ // Get and set the frame index of the first stack vararg.
+ unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
+
+ // Get and set the frame index of the register save area
+ // (i.e. the incoming stack pointer).
+ unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+ void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; }
+
+ // Get and set whether the function directly manipulates the stack pointer,
+ // e.g. through STACKSAVE or STACKRESTORE.
+ bool getManipulatesSP() const { return ManipulatesSP; }
+ void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 0000000..0abc3f7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,435 @@
+//===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Class definitions
+//===----------------------------------------------------------------------===//
+
+class ImmediateAsmOperand<string name>
+ : AsmOperandClass {
+ let Name = name;
+ let RenderMethod = "addImmOperands";
+}
+
+// Constructs both a DAG pattern and instruction operand for an immediate
+// of type VT. PRED returns true if a node is acceptable and XFORM returns
+// the operand value associated with the node. ASMOP is the name of the
+// associated asm operand, and also forms the basis of the asm print method.
+class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
+ : PatLeaf<(vt imm), pred, xform>, Operand<vt> {
+ let PrintMethod = "print"##asmop##"Operand";
+ let ParserMatchClass = !cast<AsmOperandClass>(asmop);
+}
+
+// Constructs both a DAG pattern and instruction operand for a PC-relative
+// address with address size VT. SELF is the name of the operand.
+class PCRelAddress<ValueType vt, string self>
+ : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>,
+ Operand<vt> {
+ let MIOperandInfo = (ops !cast<Operand>(self));
+}
+
+// Constructs an AsmOperandClass for addressing mode FORMAT, treating the
+// registers as having BITSIZE bits and displacements as having DISPSIZE bits.
+class AddressAsmOperand<string format, string bitsize, string dispsize>
+ : AsmOperandClass {
+ let Name = format##bitsize##"Disp"##dispsize;
+ let ParserMethod = "parse"##format##bitsize;
+ let RenderMethod = "add"##format##"Operands";
+}
+
+// Constructs both a DAG pattern and instruction operand for an addressing mode.
+// The mode is selected by custom code in selectTYPE...SUFFIX(). The address
+// registers have BITSIZE bits and displacements have DISPSIZE bits. NUMOPS is
+// the number of operands that make up an address and OPERANDS lists the types
+// of those operands using (ops ...). FORMAT is the type of addressing mode,
+// which needs to match the names used in AddressAsmOperand.
+class AddressingMode<string type, string bitsize, string dispsize,
+ string suffix, int numops, string format, dag operands>
+ : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
+ "select"##type##dispsize##suffix,
+ [add, sub, or, frameindex, z_adjdynalloc]>,
+ Operand<!cast<ValueType>("i"##bitsize)> {
+ let PrintMethod = "print"##format##"Operand";
+ let MIOperandInfo = operands;
+ let ParserMatchClass =
+ !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize);
+}
+
+// An addressing mode with a base and displacement but no index.
+class BDMode<string type, string bitsize, string dispsize, string suffix>
+ : AddressingMode<type, bitsize, dispsize, suffix, 2, "BDAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize))>;
+
+// An addressing mode with a base, displacement and index.
+class BDXMode<string type, string bitsize, string dispsize, string suffix>
+ : AddressingMode<type, bitsize, dispsize, suffix, 3, "BDXAddr",
+ (ops !cast<RegisterOperand>("ADDR"##bitsize),
+ !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+ !cast<RegisterOperand>("ADDR"##bitsize))>;
+
+//===----------------------------------------------------------------------===//
+// Extracting immediate operands from nodes
+// These all create MVT::i64 nodes to ensure the value is not sign-extended
+// when converted from an SDNode to a MachineOperand later on.
+//===----------------------------------------------------------------------===//
+
+// Bits 0-15 (counting from the lsb).
+def LL16 : SDNodeXForm<imm, [{
+ uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL;
+ return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 16-31 (counting from the lsb).
+def LH16 : SDNodeXForm<imm, [{
+ uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
+ return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 32-47 (counting from the lsb).
+def HL16 : SDNodeXForm<imm, [{
+ uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32;
+ return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 48-63 (counting from the lsb).
+def HH16 : SDNodeXForm<imm, [{
+ uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48;
+ return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Low 32 bits.
+def LF32 : SDNodeXForm<imm, [{
+ uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL;
+ return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// High 32 bits.
+def HF32 : SDNodeXForm<imm, [{
+ uint64_t Value = N->getZExtValue() >> 32;
+ return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit signed quantity.
+def SIMM8 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit unsigned quantity.
+def UIMM8 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 16-bit signed quantity.
+def SIMM16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 16-bit unsigned quantity.
+def UIMM16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 32-bit signed quantity.
+def SIMM32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 32-bit unsigned quantity.
+def UIMM32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Negate and then truncate an immediate to a 32-bit unsigned quantity.
+def NEGIMM32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), MVT::i64);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediate asm operands.
+//===----------------------------------------------------------------------===//
+
+def U4Imm : ImmediateAsmOperand<"U4Imm">;
+def U6Imm : ImmediateAsmOperand<"U6Imm">;
+def S8Imm : ImmediateAsmOperand<"S8Imm">;
+def U8Imm : ImmediateAsmOperand<"U8Imm">;
+def S16Imm : ImmediateAsmOperand<"S16Imm">;
+def U16Imm : ImmediateAsmOperand<"U16Imm">;
+def S32Imm : ImmediateAsmOperand<"S32Imm">;
+def U32Imm : ImmediateAsmOperand<"U32Imm">;
+
+//===----------------------------------------------------------------------===//
+// 8-bit immediates
+//===----------------------------------------------------------------------===//
+
+def uimm8zx4 : Immediate<i8, [{
+ return isUInt<4>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U4Imm">;
+
+def uimm8zx6 : Immediate<i8, [{
+ return isUInt<6>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U6Imm">;
+
+def simm8 : Immediate<i8, [{}], SIMM8, "S8Imm">;
+def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">;
+
+//===----------------------------------------------------------------------===//
+// i32 immediates
+//===----------------------------------------------------------------------===//
+
+// Immediates for the lower and upper 16 bits of an i32, with the other
+// bits of the i32 being zero.
+def imm32ll16 : Immediate<i32, [{
+ return SystemZ::isImmLL(N->getZExtValue());
+}], LL16, "U16Imm">;
+
+def imm32lh16 : Immediate<i32, [{
+ return SystemZ::isImmLH(N->getZExtValue());
+}], LH16, "U16Imm">;
+
+// Immediates for the lower and upper 16 bits of an i32, with the other
+// bits of the i32 being one.
+def imm32ll16c : Immediate<i32, [{
+ return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
+}], LL16, "U16Imm">;
+
+def imm32lh16c : Immediate<i32, [{
+ return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
+}], LH16, "U16Imm">;
+
+// Short immediates
+def imm32sx8 : Immediate<i32, [{
+ return isInt<8>(N->getSExtValue());
+}], SIMM8, "S8Imm">;
+
+def imm32zx8 : Immediate<i32, [{
+ return isUInt<8>(N->getZExtValue());
+}], UIMM8, "U8Imm">;
+
+def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
+
+def imm32sx16 : Immediate<i32, [{
+ return isInt<16>(N->getSExtValue());
+}], SIMM16, "S16Imm">;
+
+def imm32zx16 : Immediate<i32, [{
+ return isUInt<16>(N->getZExtValue());
+}], UIMM16, "U16Imm">;
+
+def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+
+// Full 32-bit immediates. we need both signed and unsigned versions
+// because the assembler is picky. E.g. AFI requires signed operands
+// while NILF requires unsigned ones.
+def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
+def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
+
+def imm32 : ImmLeaf<i32, [{}]>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit immediates
+//===----------------------------------------------------------------------===//
+
+// Immediates for 16-bit chunks of an i64, with the other bits of the
+// i32 being zero.
+def imm64ll16 : Immediate<i64, [{
+ return SystemZ::isImmLL(N->getZExtValue());
+}], LL16, "U16Imm">;
+
+def imm64lh16 : Immediate<i64, [{
+ return SystemZ::isImmLH(N->getZExtValue());
+}], LH16, "U16Imm">;
+
+def imm64hl16 : Immediate<i64, [{
+ return SystemZ::isImmHL(N->getZExtValue());
+}], HL16, "U16Imm">;
+
+def imm64hh16 : Immediate<i64, [{
+ return SystemZ::isImmHH(N->getZExtValue());
+}], HH16, "U16Imm">;
+
+// Immediates for 16-bit chunks of an i64, with the other bits of the
+// i32 being one.
+def imm64ll16c : Immediate<i64, [{
+ return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
+}], LL16, "U16Imm">;
+
+def imm64lh16c : Immediate<i64, [{
+ return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
+}], LH16, "U16Imm">;
+
+def imm64hl16c : Immediate<i64, [{
+ return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
+}], HL16, "U16Imm">;
+
+def imm64hh16c : Immediate<i64, [{
+ return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
+}], HH16, "U16Imm">;
+
+// Immediates for the lower and upper 32 bits of an i64, with the other
+// bits of the i32 being zero.
+def imm64lf32 : Immediate<i64, [{
+ return SystemZ::isImmLF(N->getZExtValue());
+}], LF32, "U32Imm">;
+
+def imm64hf32 : Immediate<i64, [{
+ return SystemZ::isImmHF(N->getZExtValue());
+}], HF32, "U32Imm">;
+
+// Immediates for the lower and upper 32 bits of an i64, with the other
+// bits of the i32 being one.
+def imm64lf32c : Immediate<i64, [{
+ return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
+}], LF32, "U32Imm">;
+
+def imm64hf32c : Immediate<i64, [{
+ return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
+}], HF32, "U32Imm">;
+
+// Short immediates.
+def imm64sx8 : Immediate<i64, [{
+ return isInt<8>(N->getSExtValue());
+}], SIMM8, "S8Imm">;
+
+def imm64sx16 : Immediate<i64, [{
+ return isInt<16>(N->getSExtValue());
+}], SIMM16, "S16Imm">;
+
+def imm64zx16 : Immediate<i64, [{
+ return isUInt<16>(N->getZExtValue());
+}], UIMM16, "U16Imm">;
+
+def imm64sx32 : Immediate<i64, [{
+ return isInt<32>(N->getSExtValue());
+}], SIMM32, "S32Imm">;
+
+def imm64zx32 : Immediate<i64, [{
+ return isUInt<32>(N->getZExtValue());
+}], UIMM32, "U32Imm">;
+
+def imm64zx32n : Immediate<i64, [{
+ return isUInt<32>(-N->getSExtValue());
+}], NEGIMM32, "U32Imm">;
+
+def imm64 : ImmLeaf<i64, [{}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediates
+//===----------------------------------------------------------------------===//
+
+// Floating-point zero.
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
+// Floating point negative zero.
+def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
+
+//===----------------------------------------------------------------------===//
+// Symbolic address operands
+//===----------------------------------------------------------------------===//
+
+// PC-relative offsets of a basic block. The offset is sign-extended
+// and multiplied by 2.
+def brtarget16 : Operand<OtherVT> {
+ let EncoderMethod = "getPC16DBLEncoding";
+}
+def brtarget32 : Operand<OtherVT> {
+ let EncoderMethod = "getPC32DBLEncoding";
+}
+
+// A PC-relative offset of a global value. The offset is sign-extended
+// and multiplied by 2.
+def pcrel32 : PCRelAddress<i64, "pcrel32"> {
+ let EncoderMethod = "getPC32DBLEncoding";
+}
+
+// A PC-relative offset of a global value when the value is used as a
+// call target. The offset is sign-extended and multiplied by 2.
+def pcrel16call : PCRelAddress<i64, "pcrel16call"> {
+ let PrintMethod = "printCallOperand";
+ let EncoderMethod = "getPLT16DBLEncoding";
+}
+def pcrel32call : PCRelAddress<i64, "pcrel32call"> {
+ let PrintMethod = "printCallOperand";
+ let EncoderMethod = "getPLT32DBLEncoding";
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing modes
+//===----------------------------------------------------------------------===//
+
+// 12-bit displacement operands.
+def disp12imm32 : Operand<i32>;
+def disp12imm64 : Operand<i64>;
+
+// 20-bit displacement operands.
+def disp20imm32 : Operand<i32>;
+def disp20imm64 : Operand<i64>;
+
+def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">;
+def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">;
+def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">;
+def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">;
+def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">;
+def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">;
+
+// DAG patterns and operands for addressing modes. Each mode has
+// the form <type><range><group> where:
+//
+// <type> is one of:
+// shift : base + displacement (32-bit)
+// bdaddr : base + displacement
+// bdxaddr : base + displacement + index
+// laaddr : like bdxaddr, but used for Load Address operations
+// dynalloc : base + displacement + index + ADJDYNALLOC
+//
+// <range> is one of:
+// 12 : the displacement is an unsigned 12-bit value
+// 20 : the displacement is a signed 20-bit value
+//
+// <group> is one of:
+// pair : used when there is an equivalent instruction with the opposite
+// range value (12 or 20)
+// only : used when there is no equivalent instruction with the opposite
+// range value
+def shift12only : BDMode <"BDAddr", "32", "12", "Only">;
+def shift20only : BDMode <"BDAddr", "32", "20", "Only">;
+def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">;
+def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">;
+def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">;
+def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">;
+def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">;
+def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">;
+def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">;
+def bdxaddr20only128 : BDXMode<"BDXAddr", "64", "20", "Only128">;
+def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">;
+def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">;
+def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">;
+def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous
+//===----------------------------------------------------------------------===//
+
+// Access registers. At present we just use them for accessing the thread
+// pointer, so we don't expose them as register to LLVM.
+def AccessReg : AsmOperandClass {
+ let Name = "AccessReg";
+ let ParserMethod = "parseAccessReg";
+}
+def access_reg : Immediate<i8, [{ return N->getZExtValue() < 16; }],
+ NOOP_SDNodeXForm, "AccessReg"> {
+ let ParserMatchClass = AccessReg;
+}
+
+// A 4-bit condition-code mask.
+def cond4 : PatLeaf<(i8 imm), [{ return (N->getZExtValue() < 16); }]>,
+ Operand<i8> {
+ let PrintMethod = "printCond4Operand";
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
new file mode 100644
index 0000000..8c4df56
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -0,0 +1,196 @@
+//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Type profiles
+//===----------------------------------------------------------------------===//
+def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>]>;
+def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>]>;
+def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ZBRCCMask : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisVT<1, OtherVT>]>;
+def SDT_ZSelectCCMask : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
+def SDT_ZWrapPtr : SDTypeProfile<1, 1,
+ [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def SDT_ZExtractAccess : SDTypeProfile<1, 1,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i8>]>;
+def SDT_ZGR128Binary32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, untyped>,
+ SDTCisVT<1, untyped>,
+ SDTCisVT<2, i32>]>;
+def SDT_ZGR128Binary64 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, untyped>,
+ SDTCisVT<1, untyped>,
+ SDTCisVT<2, i64>]>;
+def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>,
+ SDTCisVT<5, i32>]>;
+def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>,
+ SDTCisVT<5, i32>,
+ SDTCisVT<6, i32>]>;
+
+//===----------------------------------------------------------------------===//
+// Node definitions
+//===----------------------------------------------------------------------===//
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
+ [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
+ SDNPOutGlue]>;
+
+// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details.
+def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
+def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
+def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
+ [SDNPHasChain, SDNPInGlue]>;
+def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
+ [SDNPInGlue]>;
+def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
+def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
+ SDT_ZExtractAccess>;
+def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
+def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
+def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
+def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
+
+class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
+ : SDNode<"SystemZISD::"##name, profile,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+
+def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">;
+def z_atomic_loadw_add : AtomicWOp<"ATOMIC_LOADW_ADD">;
+def z_atomic_loadw_sub : AtomicWOp<"ATOMIC_LOADW_SUB">;
+def z_atomic_loadw_and : AtomicWOp<"ATOMIC_LOADW_AND">;
+def z_atomic_loadw_or : AtomicWOp<"ATOMIC_LOADW_OR">;
+def z_atomic_loadw_xor : AtomicWOp<"ATOMIC_LOADW_XOR">;
+def z_atomic_loadw_nand : AtomicWOp<"ATOMIC_LOADW_NAND">;
+def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">;
+def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">;
+def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
+def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
+def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Register sign-extend operations. Sub-32-bit values are represented as i32s.
+def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
+def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
+def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
+
+// Register zero-extend operations. Sub-32-bit values are represented as i32s.
+def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
+def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
+def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
+
+// Typed floating-point loads.
+def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
+def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
+
+// Aligned loads.
+class AlignedLoad<SDPatternOperator load>
+ : PatFrag<(ops node:$addr), (load node:$addr), [{
+ LoadSDNode *Load = cast<LoadSDNode>(N);
+ return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
+}]>;
+def aligned_load : AlignedLoad<load>;
+def aligned_sextloadi16 : AlignedLoad<sextloadi16>;
+def aligned_sextloadi32 : AlignedLoad<sextloadi32>;
+def aligned_zextloadi16 : AlignedLoad<zextloadi16>;
+def aligned_zextloadi32 : AlignedLoad<zextloadi32>;
+
+// Aligned stores.
+class AlignedStore<SDPatternOperator store>
+ : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
+ StoreSDNode *Store = cast<StoreSDNode>(N);
+ return Store->getAlignment() >= Store->getMemoryVT().getStoreSize();
+}]>;
+def aligned_store : AlignedStore<store>;
+def aligned_truncstorei16 : AlignedStore<truncstorei16>;
+def aligned_truncstorei32 : AlignedStore<truncstorei32>;
+
+// Insertions.
+def inserti8 : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, -256), node:$src2)>;
+def insertll : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0xffffffffffff0000), node:$src2)>;
+def insertlh : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0xffffffff0000ffff), node:$src2)>;
+def inserthl : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0xffff0000ffffffff), node:$src2)>;
+def inserthh : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0x0000ffffffffffff), node:$src2)>;
+def insertlf : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0xffffffff00000000), node:$src2)>;
+def inserthf : PatFrag<(ops node:$src1, node:$src2),
+ (or (and node:$src1, 0x00000000ffffffff), node:$src2)>;
+
+// ORs that can be treated as insertions.
+def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2),
+ (or node:$src1, node:$src2), [{
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ return CurDAG->MaskedValueIsZero(N->getOperand(0),
+ APInt::getLowBitsSet(BitWidth, 8));
+}]>;
+
+// ORs that can be treated as reversed insertions.
+def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
+ (or node:$src1, node:$src2), [{
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ return CurDAG->MaskedValueIsZero(N->getOperand(1),
+ APInt::getLowBitsSet(BitWidth, 8));
+}]>;
+
+// Fused multiply-add and multiply-subtract, but with the order of the
+// operands matching SystemZ's MA and MS instructions.
+def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fma node:$src2, node:$src3, node:$src1)>;
+def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fma node:$src2, node:$src3, (fneg node:$src1))>;
+
+// Floating-point negative absolute.
+def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
+
+// Create a unary operator that loads from memory and then performs
+// the given operation on it.
+class loadu<SDPatternOperator operator>
+ : PatFrag<(ops node:$addr), (operator (load node:$addr))>;
+
+// Create a store operator that performs the given unary operation
+// on the value before storing it.
+class storeu<SDPatternOperator operator>
+ : PatFrag<(ops node:$value, node:$addr),
+ (store (operator node:$value), node:$addr)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
new file mode 100644
index 0000000..3689f74
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -0,0 +1,71 @@
+//===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Record that INSN performs a 64-bit version of unary operator OPERATOR
+// in which the operand is sign-extended from 32 to 64 bits.
+multiclass SXU<SDPatternOperator operator, Instruction insn> {
+ def : Pat<(operator (sext (i32 GR32:$src))),
+ (insn GR32:$src)>;
+ def : Pat<(operator (sext_inreg GR64:$src, i32)),
+ (insn (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+}
+
+// Record that INSN performs a 64-bit version of binary operator OPERATOR
+// in which the first operand has class CLS and which the second operand
+// is sign-extended from a 32-bit register.
+multiclass SXB<SDPatternOperator operator, RegisterOperand cls,
+ Instruction insn> {
+ def : Pat<(operator cls:$src1, (sext GR32:$src2)),
+ (insn cls:$src1, GR32:$src2)>;
+ def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)),
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+}
+
+// Like SXB, but for zero extension.
+multiclass ZXB<SDPatternOperator operator, RegisterOperand cls,
+ Instruction insn> {
+ def : Pat<(operator cls:$src1, (zext GR32:$src2)),
+ (insn cls:$src1, GR32:$src2)>;
+ def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)),
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+}
+
+// Record that INSN performs a binary read-modify-write operation,
+// with LOAD, OPERATOR and STORE being the read, modify and write
+// respectively. MODE is the addressing mode and IMM is the type
+// of the second operand.
+class RMWI<SDPatternOperator load, SDPatternOperator operator,
+ SDPatternOperator store, AddressingMode mode,
+ PatFrag imm, Instruction insn>
+ : Pat<(store (operator (load mode:$addr), imm:$src), mode:$addr),
+ (insn mode:$addr, (UIMM8 imm:$src))>;
+
+// Record that INSN performs binary operation OPERATION on a byte
+// memory location. IMM is the type of the second operand.
+multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
+ Instruction insn> {
+ def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>;
+ def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>;
+ def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>;
+ def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>;
+ def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>;
+ def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>;
+}
+
+// Record that INSN performs insertion TYPE into a register of class CLS.
+// The inserted operand is loaded using LOAD from an address of mode MODE.
+multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
+ SDPatternOperator load, AddressingMode mode> {
+ def : Pat<(!cast<SDPatternOperator>("or_as_"##type)
+ cls:$src1, (load mode:$src2)),
+ (insn cls:$src1, mode:$src2)>;
+ def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type)
+ (load mode:$src2), cls:$src1),
+ (insn cls:$src1, mode:$src2)>;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 0000000..a0ae7ed
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,162 @@
+//===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZRegisterInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
+ const SystemZInstrInfo &tii)
+ : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm), TII(tii) {}
+
+const uint16_t*
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const uint16_t CalleeSavedRegs[] = {
+ SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D,
+ SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
+ SystemZ::R14D, SystemZ::R15D,
+ SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+ SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+BitVector
+SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (TFI->hasFP(MF)) {
+ // R11D is the frame pointer. Reserve all aliases.
+ Reserved.set(SystemZ::R11D);
+ Reserved.set(SystemZ::R11W);
+ Reserved.set(SystemZ::R10Q);
+ }
+
+ // R15D is the stack pointer. Reserve all aliases.
+ Reserved.set(SystemZ::R15D);
+ Reserved.set(SystemZ::R15W);
+ Reserved.set(SystemZ::R14Q);
+ return Reserved;
+}
+
+bool
+SystemZRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator SaveMBBI,
+ MachineBasicBlock::iterator &UseMBBI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ MachineFunction &MF = *MBB.getParent();
+ const SystemZFrameLowering *TFI =
+ static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
+ unsigned Base = getFrameRegister(MF);
+ uint64_t Offset = TFI->getEmergencySpillSlotOffset(MF);
+ DebugLoc DL;
+
+ unsigned LoadOpcode, StoreOpcode;
+ TII.getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+
+ // The offset must always be in range of a 12-bit unsigned displacement.
+ BuildMI(MBB, SaveMBBI, DL, TII.get(StoreOpcode))
+ .addReg(Reg, RegState::Kill).addReg(Base).addImm(Offset).addReg(0);
+ BuildMI(MBB, UseMBBI, DL, TII.get(LoadOpcode), Reg)
+ .addReg(Base).addImm(Offset).addReg(0);
+ return true;
+}
+
+void
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Outgoing arguments should be part of the frame");
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Decompose the frame index into a base and offset.
+ int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
+ unsigned BasePtr = getFrameRegister(MF);
+ int64_t Offset = (TFI->getFrameIndexOffset(MF, FrameIndex) +
+ MI->getOperand(FIOperandNum + 1).getImm());
+
+ // Special handling of dbg_value instructions.
+ if (MI->isDebugValue()) {
+ MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false);
+ MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // See if the offset is in range, or if an equivalent instruction that
+ // accepts the offset exists.
+ unsigned Opcode = MI->getOpcode();
+ unsigned OpcodeForOffset = TII.getOpcodeForOffset(Opcode, Offset);
+ if (OpcodeForOffset)
+ MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ else {
+ // Create an anchor point that is in range. Start at 0xffff so that
+ // can use LLILH to load the immediate.
+ int64_t OldOffset = Offset;
+ int64_t Mask = 0xffff;
+ do {
+ Offset = OldOffset & Mask;
+ OpcodeForOffset = TII.getOpcodeForOffset(Opcode, Offset);
+ Mask >>= 1;
+ assert(Mask && "One offset must be OK");
+ } while (!OpcodeForOffset);
+
+ unsigned ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ int64_t HighOffset = OldOffset - Offset;
+
+ if (MI->getDesc().TSFlags & SystemZII::HasIndex
+ && MI->getOperand(FIOperandNum + 2).getReg() == 0) {
+ // Load the offset into the scratch register and use it as an index.
+ // The scratch register then dies here.
+ TII.loadImmediate(MBB, MI, ScratchReg, HighOffset);
+ MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+ MI->getOperand(FIOperandNum + 2).ChangeToRegister(ScratchReg,
+ false, false, true);
+ } else {
+ // Load the anchor address into a scratch register.
+ unsigned LAOpcode = TII.getOpcodeForOffset(SystemZ::LA, HighOffset);
+ if (LAOpcode)
+ BuildMI(MBB, MI, DL, TII.get(LAOpcode),ScratchReg)
+ .addReg(BasePtr).addImm(HighOffset).addReg(0);
+ else {
+ // Load the high offset into the scratch register and use it as
+ // an index.
+ TII.loadImmediate(MBB, MI, ScratchReg, HighOffset);
+ BuildMI(MBB, MI, DL, TII.get(SystemZ::AGR),ScratchReg)
+ .addReg(ScratchReg, RegState::Kill).addReg(BasePtr);
+ }
+
+ // Use the scratch register as the base. It then dies here.
+ MI->getOperand(FIOperandNum).ChangeToRegister(ScratchReg,
+ false, false, true);
+ }
+ }
+ MI->setDesc(TII.get(OpcodeForOffset));
+ MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+}
+
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 0000000..91a70de
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,70 @@
+//===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZREGISTERINFO_H
+#define SystemZREGISTERINFO_H
+
+#include "SystemZ.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
+
+namespace llvm {
+
+namespace SystemZ {
+ // Return the subreg to use for referring to the even and odd registers
+ // in a GR128 pair. Is32Bit says whether we want a GR32 or GR64.
+ inline unsigned even128(bool Is32bit) {
+ return Is32bit ? subreg_32bit : subreg_high;
+ }
+ inline unsigned odd128(bool Is32bit) {
+ return Is32bit ? subreg_low32 : subreg_low;
+ }
+}
+
+class SystemZSubtarget;
+class SystemZInstrInfo;
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+private:
+ SystemZTargetMachine &TM;
+ const SystemZInstrInfo &TII;
+
+public:
+ SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+
+ // Override TargetRegisterInfo.h.
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+ LLVM_OVERRIDE {
+ return true;
+ }
+ virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const
+ LLVM_OVERRIDE {
+ return true;
+ }
+ virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0)
+ const LLVM_OVERRIDE;
+ virtual BitVector getReservedRegs(const MachineFunction &MF)
+ const LLVM_OVERRIDE;
+ virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator SaveMBBI,
+ MachineBasicBlock::iterator &UseMBBI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const LLVM_OVERRIDE;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const LLVM_OVERRIDE;
+ virtual unsigned getFrameRegister(const MachineFunction &MF) const
+ LLVM_OVERRIDE;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 0000000..bd1b563
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,150 @@
+//==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Class definitions.
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+ let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ let Namespace = "SystemZ";
+}
+
+let Namespace = "SystemZ" in {
+def subreg_32bit : SubRegIndex; // could also be known as "subreg_high32"
+def subreg_high : SubRegIndex;
+def subreg_low : SubRegIndex;
+def subreg_low32 : SubRegIndex<[subreg_low, subreg_32bit]>;
+}
+
+// Define a register class that contains values of type TYPE and an
+// associated operand called NAME. SIZE is the size and alignment
+// of the registers and REGLIST is the list of individual registers.
+multiclass SystemZRegClass<string name, ValueType type, int size, dag regList> {
+ def AsmOperand : AsmOperandClass {
+ let Name = name;
+ let ParserMethod = "parse"##name;
+ let RenderMethod = "addRegOperands";
+ }
+ def Bit : RegisterClass<"SystemZ", [type], size, regList> {
+ let Size = size;
+ }
+ def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
+ let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// General-purpose registers
+//===----------------------------------------------------------------------===//
+
+// Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<16> num, string n> : SystemZReg<n> {
+ let HWEncoding = num;
+}
+
+// One of the 16 64-bit general-purpose registers.
+class GPR64<bits<16> num, string n, GPR32 low>
+ : SystemZRegWithSubregs<n, [low]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_32bit];
+}
+
+// 8 even-odd pairs of GPR64s.
+class GPR128<bits<16> num, string n, GPR64 high, GPR64 low>
+ : SystemZRegWithSubregs<n, [high, low]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_high, subreg_low];
+}
+
+// General-purpose registers
+foreach I = 0-15 in {
+ def R#I#W : GPR32<I, "r"#I>;
+ def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"W")>, DwarfRegNum<[I]>;
+}
+
+foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
+ def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#I#"D"),
+ !cast<GPR64>("R"#!add(I, 1)#"D")>;
+}
+
+/// Allocate the callee-saved R6-R13 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uW", 0, 5),
+ (sequence "R%uW", 15, 6))>;
+defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
+ (sequence "R%uD", 15, 6))>;
+
+// The architecture doesn't really have any i128 support, so model the
+// register pairs as untyped instead.
+defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
+ R12Q, R10Q, R8Q, R6Q,
+ R14Q)>;
+
+// Base and index registers. Everything except R0, which in an address
+// context evaluates as 0.
+defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0W)>;
+defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
+
+// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
+// of a GR128.
+defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point registers
+//===----------------------------------------------------------------------===//
+
+// Lower 32 bits of one of the 16 64-bit floating-point registers
+class FPR32<bits<16> num, string n> : SystemZReg<n> {
+ let HWEncoding = num;
+}
+
+// One of the 16 64-bit floating-point registers
+class FPR64<bits<16> num, string n, FPR32 low>
+ : SystemZRegWithSubregs<n, [low]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_32bit];
+}
+
+// 8 pairs of FPR64s, with a one-register gap inbetween.
+class FPR128<bits<16> num, string n, FPR64 high, FPR64 low>
+ : SystemZRegWithSubregs<n, [high, low]> {
+ let HWEncoding = num;
+ let SubRegIndices = [subreg_high, subreg_low];
+}
+
+// Floating-point registers
+foreach I = 0-15 in {
+ def F#I#S : FPR32<I, "f"#I>;
+ def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
+ DwarfRegNum<[!add(I, 16)]>;
+}
+
+foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
+ def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#I#"D"),
+ !cast<FPR64>("F"#!add(I, 2)#"D")>;
+}
+
+// There's no store-multiple instruction for FPRs, so we're not fussy
+// about the order in which call-saved registers are allocated.
+defm FP32 : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>;
+defm FP64 : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>;
+defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q,
+ F8Q, F9Q, F12Q, F13Q)>;
+
+//===----------------------------------------------------------------------===//
+// Other registers
+//===----------------------------------------------------------------------===//
+
+// Status register
+def PSW : SystemZReg<"psw">;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 0000000..cfd3324
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,56 @@
+//===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "llvm/IR/GlobalValue.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SystemZSubtarget::SystemZSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS)
+ : SystemZGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT) {
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "z10";
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+}
+
+// Return true if GV binds locally under reloc model RM.
+static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
+ // For non-PIC, all symbols bind locally.
+ if (RM == Reloc::Static)
+ return true;
+
+ return GV->hasLocalLinkage() || !GV->hasDefaultVisibility();
+}
+
+bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
+ Reloc::Model RM,
+ CodeModel::Model CM) const {
+ // PC32DBL accesses require the low bit to be clear. Note that a zero
+ // value selects the default alignment and is therefore OK.
+ if (GV->getAlignment() == 1)
+ return false;
+
+ // For the small model, all locally-binding symbols are in range.
+ if (CM == CodeModel::Small)
+ return bindsLocally(GV, RM);
+
+ // For Medium and above, assume that the symbol is not within the 4GB range.
+ // Taking the address of locally-defined text would be OK, but that
+ // case isn't easy to detect.
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 0000000..8d4d450
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,48 @@
+//===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSUBTARGET_H
+#define SYSTEMZSUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
+private:
+ Triple TargetTriple;
+
+public:
+ SystemZSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ // Automatically generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ // Return true if GV can be accessed using LARL for reloc model RM
+ // and code model CM.
+ bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
+ CodeModel::Model CM) const;
+
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 0000000..8c4c456
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,60 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+ // Register the target.
+ RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+}
+
+SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ // Make sure that global data has at least 16 bits of alignment by default,
+ // so that we can refer to it using LARL. We don't have any special
+ // requirements for stack variables though.
+ DL("E-p:64:64:64-i1:8:16-i8:8:16-i16:16-i32:32-i64:64"
+ "-f32:32-f64:64-f128:64-a0:8:16-n32:64"),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+ FrameLowering(*this, Subtarget) {
+}
+
+namespace {
+/// SystemZ Code Generator Pass Configuration Options.
+class SystemZPassConfig : public TargetPassConfig {
+public:
+ SystemZPassConfig(SystemZTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ SystemZTargetMachine &getSystemZTargetMachine() const {
+ return getTM<SystemZTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+};
+} // end anonymous namespace
+
+bool SystemZPassConfig::addInstSelector() {
+ addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
+ return false;
+}
+
+TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new SystemZPassConfig(this, PM);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 0000000..98614e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,74 @@
+//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SYSTEMZTARGETMACHINE_H
+#define SYSTEMZTARGETMACHINE_H
+
+#include "SystemZFrameLowering.h"
+#include "SystemZISelLowering.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine : public LLVMTargetMachine {
+ SystemZSubtarget Subtarget;
+ const DataLayout DL;
+ SystemZInstrInfo InstrInfo;
+ SystemZTargetLowering TLInfo;
+ TargetSelectionDAGInfo TSInfo;
+ SystemZFrameLowering FrameLowering;
+
+public:
+ SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ // Override TargetMachine.
+ virtual const TargetFrameLowering *getFrameLowering() const LLVM_OVERRIDE {
+ return &FrameLowering;
+ }
+ virtual const SystemZInstrInfo *getInstrInfo() const LLVM_OVERRIDE {
+ return &InstrInfo;
+ }
+ virtual const SystemZSubtarget *getSubtargetImpl() const LLVM_OVERRIDE {
+ return &Subtarget;
+ }
+ virtual const DataLayout *getDataLayout() const LLVM_OVERRIDE {
+ return &DL;
+ }
+ virtual const SystemZRegisterInfo *getRegisterInfo() const LLVM_OVERRIDE {
+ return &InstrInfo.getRegisterInfo();
+ }
+ virtual const SystemZTargetLowering *getTargetLowering() const LLVM_OVERRIDE {
+ return &TLInfo;
+ }
+ virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const
+ LLVM_OVERRIDE {
+ return &TSInfo;
+ }
+
+ // Override LLVMTargetMachine
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM) LLVM_OVERRIDE;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 0000000..8f9aa28
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::TheSystemZTarget;
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+ RegisterTarget<Triple::systemz, /*HasJIT=*/true>
+ X(TheSystemZTarget, "systemz", "SystemZ");
+}
diff --git a/contrib/llvm/lib/Target/Target.cpp b/contrib/llvm/lib/Target/Target.cpp
index 9a78ebc..3d92f29 100644
--- a/contrib/llvm/lib/Target/Target.cpp
+++ b/contrib/llvm/lib/Target/Target.cpp
@@ -16,6 +16,7 @@
#include "llvm-c/Initialization.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassManager.h"
#include "llvm/Target/TargetLibraryInfo.h"
@@ -23,6 +24,23 @@
using namespace llvm;
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+ return reinterpret_cast<DataLayout*>(P);
+}
+
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+ return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+ return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+ TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+ return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
+
void llvm::initializeTarget(PassRegistry &Registry) {
initializeDataLayoutPass(Registry);
initializeTargetLibraryInfoPass(Registry);
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 79f74bd..01d12e8 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -28,7 +28,36 @@
using namespace llvm;
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+ return reinterpret_cast<DataLayout*>(P);
+}
+
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+ return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+ return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+ TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+ return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
+inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
+ return reinterpret_cast<TargetMachine*>(P);
+}
+inline Target *unwrap(LLVMTargetRef P) {
+ return reinterpret_cast<Target*>(P);
+}
+inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
+ return
+ reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P));
+}
+inline LLVMTargetRef wrap(const Target * P) {
+ return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P));
+}
LLVMTargetRef LLVMGetFirstTarget() {
const Target* target = &*TargetRegistry::begin();
@@ -77,29 +106,9 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple,
break;
}
- CodeModel::Model CM;
- switch (CodeModel) {
- case LLVMCodeModelJITDefault:
- CM = CodeModel::JITDefault;
- break;
- case LLVMCodeModelSmall:
- CM = CodeModel::Small;
- break;
- case LLVMCodeModelKernel:
- CM = CodeModel::Kernel;
- break;
- case LLVMCodeModelMedium:
- CM = CodeModel::Medium;
- break;
- case LLVMCodeModelLarge:
- CM = CodeModel::Large;
- break;
- default:
- CM = CodeModel::Default;
- break;
- }
- CodeGenOpt::Level OL;
+ CodeModel::Model CM = unwrap(CodeModel);
+ CodeGenOpt::Level OL;
switch (Level) {
case LLVMCodeGenLevelNone:
OL = CodeGenOpt::None;
@@ -149,8 +158,8 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
return wrap(unwrap(T)->getDataLayout());
}
-LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
- char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
+ formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
TargetMachine* TM = unwrap(T);
Module* Mod = unwrap(M);
@@ -176,14 +185,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
ft = TargetMachine::CGFT_ObjectFile;
break;
}
- raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
- formatted_raw_ostream destf(dest);
- if (!error.empty()) {
- *ErrorMessage = strdup(error.c_str());
- return true;
- }
-
- if (TM->addPassesToEmitFile(pass, destf, ft)) {
+ if (TM->addPassesToEmitFile(pass, OS, ft)) {
error = "TargetMachine can't emit a file of this type";
*ErrorMessage = strdup(error.c_str());
return true;
@@ -191,7 +193,35 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
pass.run(*Mod);
- destf.flush();
- dest.flush();
+ OS.flush();
return false;
}
+
+LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
+ char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+ std::string error;
+ raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
+ formatted_raw_ostream destf(dest);
+ if (!error.empty()) {
+ *ErrorMessage = strdup(error.c_str());
+ return true;
+ }
+ bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
+ dest.flush();
+ return Result;
+}
+
+LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
+ LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage,
+ LLVMMemoryBufferRef *OutMemBuf) {
+ std::string CodeString;
+ raw_string_ostream OStream(CodeString);
+ formatted_raw_ostream Out(OStream);
+ bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage);
+ OStream.flush();
+
+ std::string &Data = OStream.str();
+ *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(),
+ Data.length(), "");
+ return Result;
+}
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e462322..68908ab 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -33,17 +33,451 @@ using namespace llvm;
namespace {
struct X86Operand;
+static const char OpPrecedence[] = {
+ 0, // IC_PLUS
+ 0, // IC_MINUS
+ 1, // IC_MULTIPLY
+ 1, // IC_DIVIDE
+ 2, // IC_RPAREN
+ 3, // IC_LPAREN
+ 0, // IC_IMM
+ 0 // IC_REGISTER
+};
+
class X86AsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
ParseInstructionInfo *InstInfo;
private:
+ enum InfixCalculatorTok {
+ IC_PLUS = 0,
+ IC_MINUS,
+ IC_MULTIPLY,
+ IC_DIVIDE,
+ IC_RPAREN,
+ IC_LPAREN,
+ IC_IMM,
+ IC_REGISTER
+ };
+
+ class InfixCalculator {
+ typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
+ SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
+ SmallVector<ICToken, 4> PostfixStack;
+
+ public:
+ int64_t popOperand() {
+ assert (!PostfixStack.empty() && "Poped an empty stack!");
+ ICToken Op = PostfixStack.pop_back_val();
+ assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
+ && "Expected and immediate or register!");
+ return Op.second;
+ }
+ void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
+ assert ((Op == IC_IMM || Op == IC_REGISTER) &&
+ "Unexpected operand!");
+ PostfixStack.push_back(std::make_pair(Op, Val));
+ }
+
+ void popOperator() { InfixOperatorStack.pop_back_val(); }
+ void pushOperator(InfixCalculatorTok Op) {
+ // Push the new operator if the stack is empty.
+ if (InfixOperatorStack.empty()) {
+ InfixOperatorStack.push_back(Op);
+ return;
+ }
+
+ // Push the new operator if it has a higher precedence than the operator
+ // on the top of the stack or the operator on the top of the stack is a
+ // left parentheses.
+ unsigned Idx = InfixOperatorStack.size() - 1;
+ InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
+ if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
+ InfixOperatorStack.push_back(Op);
+ return;
+ }
+
+ // The operator on the top of the stack has higher precedence than the
+ // new operator.
+ unsigned ParenCount = 0;
+ while (1) {
+ // Nothing to process.
+ if (InfixOperatorStack.empty())
+ break;
+
+ Idx = InfixOperatorStack.size() - 1;
+ StackOp = InfixOperatorStack[Idx];
+ if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
+ break;
+
+ // If we have an even parentheses count and we see a left parentheses,
+ // then stop processing.
+ if (!ParenCount && StackOp == IC_LPAREN)
+ break;
+
+ if (StackOp == IC_RPAREN) {
+ ++ParenCount;
+ InfixOperatorStack.pop_back_val();
+ } else if (StackOp == IC_LPAREN) {
+ --ParenCount;
+ InfixOperatorStack.pop_back_val();
+ } else {
+ InfixOperatorStack.pop_back_val();
+ PostfixStack.push_back(std::make_pair(StackOp, 0));
+ }
+ }
+ // Push the new operator.
+ InfixOperatorStack.push_back(Op);
+ }
+ int64_t execute() {
+ // Push any remaining operators onto the postfix stack.
+ while (!InfixOperatorStack.empty()) {
+ InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
+ if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
+ PostfixStack.push_back(std::make_pair(StackOp, 0));
+ }
+
+ if (PostfixStack.empty())
+ return 0;
+
+ SmallVector<ICToken, 16> OperandStack;
+ for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
+ ICToken Op = PostfixStack[i];
+ if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
+ OperandStack.push_back(Op);
+ } else {
+ assert (OperandStack.size() > 1 && "Too few operands.");
+ int64_t Val;
+ ICToken Op2 = OperandStack.pop_back_val();
+ ICToken Op1 = OperandStack.pop_back_val();
+ switch (Op.first) {
+ default:
+ report_fatal_error("Unexpected operator!");
+ break;
+ case IC_PLUS:
+ Val = Op1.second + Op2.second;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ case IC_MINUS:
+ Val = Op1.second - Op2.second;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ case IC_MULTIPLY:
+ assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Multiply operation with an immediate and a register!");
+ Val = Op1.second * Op2.second;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ case IC_DIVIDE:
+ assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+ "Divide operation with an immediate and a register!");
+ assert (Op2.second != 0 && "Division by zero!");
+ Val = Op1.second / Op2.second;
+ OperandStack.push_back(std::make_pair(IC_IMM, Val));
+ break;
+ }
+ }
+ }
+ assert (OperandStack.size() == 1 && "Expected a single result.");
+ return OperandStack.pop_back_val().second;
+ }
+ };
+
+ enum IntelExprState {
+ IES_PLUS,
+ IES_MINUS,
+ IES_MULTIPLY,
+ IES_DIVIDE,
+ IES_LBRAC,
+ IES_RBRAC,
+ IES_LPAREN,
+ IES_RPAREN,
+ IES_REGISTER,
+ IES_INTEGER,
+ IES_IDENTIFIER,
+ IES_ERROR
+ };
+
+ class IntelExprStateMachine {
+ IntelExprState State, PrevState;
+ unsigned BaseReg, IndexReg, TmpReg, Scale;
+ int64_t Imm;
+ const MCExpr *Sym;
+ StringRef SymName;
+ bool StopOnLBrac, AddImmPrefix;
+ InfixCalculator IC;
+ InlineAsmIdentifierInfo Info;
+ public:
+ IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
+ State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
+ Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
+ AddImmPrefix(addimmprefix) { Info.clear(); }
+
+ unsigned getBaseReg() { return BaseReg; }
+ unsigned getIndexReg() { return IndexReg; }
+ unsigned getScale() { return Scale; }
+ const MCExpr *getSym() { return Sym; }
+ StringRef getSymName() { return SymName; }
+ int64_t getImm() { return Imm + IC.execute(); }
+ bool isValidEndState() { return State == IES_RBRAC; }
+ bool getStopOnLBrac() { return StopOnLBrac; }
+ bool getAddImmPrefix() { return AddImmPrefix; }
+ bool hadError() { return State == IES_ERROR; }
+
+ InlineAsmIdentifierInfo &getIdentifierInfo() {
+ return Info;
+ }
+
+ void onPlus() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ case IES_REGISTER:
+ State = IES_PLUS;
+ IC.pushOperator(IC_PLUS);
+ if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+ // If we already have a BaseReg, then assume this is the IndexReg with
+ // a scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ }
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onMinus() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_PLUS:
+ case IES_MULTIPLY:
+ case IES_DIVIDE:
+ case IES_LPAREN:
+ case IES_RPAREN:
+ case IES_LBRAC:
+ case IES_RBRAC:
+ case IES_INTEGER:
+ case IES_REGISTER:
+ State = IES_MINUS;
+ // Only push the minus operator if it is not a unary operator.
+ if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
+ CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
+ CurrState == IES_LPAREN || CurrState == IES_LBRAC))
+ IC.pushOperator(IC_MINUS);
+ if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+ // If we already have a BaseReg, then assume this is the IndexReg with
+ // a scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ }
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onRegister(unsigned Reg) {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_PLUS:
+ case IES_LPAREN:
+ State = IES_REGISTER;
+ TmpReg = Reg;
+ IC.pushOperand(IC_REGISTER);
+ break;
+ case IES_MULTIPLY:
+ // Index Register - Scale * Register
+ if (PrevState == IES_INTEGER) {
+ assert (!IndexReg && "IndexReg already set!");
+ State = IES_REGISTER;
+ IndexReg = Reg;
+ // Get the scale and replace the 'Scale * Register' with '0'.
+ Scale = IC.popOperand();
+ IC.pushOperand(IC_IMM);
+ IC.popOperator();
+ } else {
+ State = IES_ERROR;
+ }
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
+ PrevState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_PLUS:
+ case IES_MINUS:
+ State = IES_INTEGER;
+ Sym = SymRef;
+ SymName = SymRefName;
+ IC.pushOperand(IC_IMM);
+ break;
+ }
+ }
+ void onInteger(int64_t TmpInt) {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_PLUS:
+ case IES_MINUS:
+ case IES_DIVIDE:
+ case IES_MULTIPLY:
+ case IES_LPAREN:
+ State = IES_INTEGER;
+ if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
+ // Index Register - Register * Scale
+ assert (!IndexReg && "IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = TmpInt;
+ // Get the scale and replace the 'Register * Scale' with '0'.
+ IC.popOperator();
+ } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+ PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+ PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+ CurrState == IES_MINUS) {
+ // Unary minus. No need to pop the minus operand because it was never
+ // pushed.
+ IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
+ } else {
+ IC.pushOperand(IC_IMM, TmpInt);
+ }
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onStar() {
+ PrevState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_REGISTER:
+ case IES_RPAREN:
+ State = IES_MULTIPLY;
+ IC.pushOperator(IC_MULTIPLY);
+ break;
+ }
+ }
+ void onDivide() {
+ PrevState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_RPAREN:
+ State = IES_DIVIDE;
+ IC.pushOperator(IC_DIVIDE);
+ break;
+ }
+ }
+ void onLBrac() {
+ PrevState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_RBRAC:
+ State = IES_PLUS;
+ IC.pushOperator(IC_PLUS);
+ break;
+ }
+ }
+ void onRBrac() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_REGISTER:
+ case IES_RPAREN:
+ State = IES_RBRAC;
+ if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+ // If we already have a BaseReg, then assume this is the IndexReg with
+ // a scale of 1.
+ if (!BaseReg) {
+ BaseReg = TmpReg;
+ } else {
+ assert (!IndexReg && "BaseReg/IndexReg already set!");
+ IndexReg = TmpReg;
+ Scale = 1;
+ }
+ }
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onLParen() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_PLUS:
+ case IES_MINUS:
+ case IES_MULTIPLY:
+ case IES_DIVIDE:
+ case IES_LPAREN:
+ // FIXME: We don't handle this type of unary minus, yet.
+ if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+ PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+ PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+ CurrState == IES_MINUS) {
+ State = IES_ERROR;
+ break;
+ }
+ State = IES_LPAREN;
+ IC.pushOperator(IC_LPAREN);
+ break;
+ }
+ PrevState = CurrState;
+ }
+ void onRParen() {
+ PrevState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_INTEGER:
+ case IES_REGISTER:
+ case IES_RPAREN:
+ State = IES_RPAREN;
+ IC.pushOperator(IC_RPAREN);
+ break;
+ }
+ }
+ };
+
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
bool Error(SMLoc L, const Twine &Msg,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+ ArrayRef<SMRange> Ranges = None,
bool MatchingInlineAsm = false) {
if (MatchingInlineAsm) return true;
return Parser.Error(L, Msg, Ranges);
@@ -57,21 +491,25 @@ private:
X86Operand *ParseOperand();
X86Operand *ParseATTOperand();
X86Operand *ParseIntelOperand();
- X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
- X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
- X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
+ X86Operand *ParseIntelOffsetOfOperator();
+ X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
+ X86Operand *ParseIntelOperator(unsigned OpKind);
+ X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp,
SMLoc StartLoc);
- X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp,
- unsigned Size);
- X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
- SMLoc &IdentStart);
- X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
+ X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+ int64_t ImmDisp, unsigned Size);
+ X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
+ InlineAsmIdentifierInfo &Info,
+ bool IsUnevaluatedOperand, SMLoc &End);
- X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
- SMLoc SizeDirLoc, unsigned Size);
+ X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
- bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
- SmallString<64> &Err);
+ X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, SMLoc Start, SMLoc End,
+ unsigned Size, StringRef Identifier,
+ InlineAsmIdentifierInfo &Info);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
@@ -101,6 +539,10 @@ private:
setAvailableFeatures(FB);
}
+ bool isParsingIntelSyntax() {
+ return getParser().getAssemblerDialect();
+ }
+
/// @name Auto-generated Matcher Functions
/// {
@@ -123,10 +565,6 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
-
- bool isParsingIntelSyntax() {
- return getParser().getAssemblerDialect();
- }
};
} // end anonymous namespace
@@ -176,6 +614,8 @@ struct X86Operand : public MCParsedAsmOperand {
SMLoc StartLoc, EndLoc;
SMLoc OffsetOfLoc;
+ StringRef SymName;
+ void *OpDecl;
bool AddressOf;
struct TokOp {
@@ -210,6 +650,9 @@ struct X86Operand : public MCParsedAsmOperand {
X86Operand(KindTy K, SMLoc Start, SMLoc End)
: Kind(K), StartLoc(Start), EndLoc(End) {}
+ StringRef getSymName() { return SymName; }
+ void *getOpDecl() { return OpDecl; }
+
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
@@ -473,11 +916,15 @@ struct X86Operand : public MCParsedAsmOperand {
static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
bool AddressOf = false,
- SMLoc OffsetOfLoc = SMLoc()) {
+ SMLoc OffsetOfLoc = SMLoc(),
+ StringRef SymName = StringRef(),
+ void *OpDecl = 0) {
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
Res->AddressOf = AddressOf;
Res->OffsetOfLoc = OffsetOfLoc;
+ Res->SymName = SymName;
+ Res->OpDecl = OpDecl;
return Res;
}
@@ -489,7 +936,8 @@ struct X86Operand : public MCParsedAsmOperand {
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0) {
+ unsigned Size = 0, StringRef SymName = StringRef(),
+ void *OpDecl = 0) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -497,7 +945,9 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
- Res->AddressOf = false;
+ Res->SymName = SymName;
+ Res->OpDecl = OpDecl;
+ Res->AddressOf = false;
return Res;
}
@@ -505,7 +955,9 @@ struct X86Operand : public MCParsedAsmOperand {
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0) {
+ unsigned Size = 0,
+ StringRef SymName = StringRef(),
+ void *OpDecl = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -520,7 +972,9 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
- Res->AddressOf = false;
+ Res->SymName = SymName;
+ Res->OpDecl = OpDecl;
+ Res->AddressOf = false;
return Res;
}
};
@@ -676,306 +1130,104 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
return Size;
}
-enum IntelBracExprState {
- IBES_START,
- IBES_LBRAC,
- IBES_RBRAC,
- IBES_REGISTER,
- IBES_REGISTER_STAR,
- IBES_REGISTER_STAR_INTEGER,
- IBES_INTEGER,
- IBES_INTEGER_STAR,
- IBES_INDEX_REGISTER,
- IBES_IDENTIFIER,
- IBES_DISP_EXPR,
- IBES_MINUS,
- IBES_ERROR
-};
-
-class IntelBracExprStateMachine {
- IntelBracExprState State;
- unsigned BaseReg, IndexReg, Scale;
- int64_t Disp;
-
- unsigned TmpReg;
- int64_t TmpInteger;
-
- bool isPlus;
-
-public:
- IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
- State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp),
- TmpReg(0), TmpInteger(0), isPlus(true) {}
-
- unsigned getBaseReg() { return BaseReg; }
- unsigned getIndexReg() { return IndexReg; }
- unsigned getScale() { return Scale; }
- int64_t getDisp() { return Disp; }
- bool isValidEndState() { return State == IBES_RBRAC; }
-
- void onPlus() {
- switch (State) {
- default:
- State = IBES_ERROR;
- break;
- case IBES_INTEGER:
- State = IBES_START;
- if (isPlus)
- Disp += TmpInteger;
- else
- Disp -= TmpInteger;
- break;
- case IBES_REGISTER:
- State = IBES_START;
- // If we already have a BaseReg, then assume this is the IndexReg with a
- // scale of 1.
- if (!BaseReg) {
- BaseReg = TmpReg;
- } else {
- assert (!IndexReg && "BaseReg/IndexReg already set!");
- IndexReg = TmpReg;
- Scale = 1;
- }
- break;
- case IBES_INDEX_REGISTER:
- State = IBES_START;
- break;
- }
- isPlus = true;
- }
- void onMinus() {
- switch (State) {
- default:
- State = IBES_ERROR;
- break;
- case IBES_START:
- State = IBES_MINUS;
- break;
- case IBES_INTEGER:
- State = IBES_START;
- if (isPlus)
- Disp += TmpInteger;
- else
- Disp -= TmpInteger;
- break;
- case IBES_REGISTER:
- State = IBES_START;
- // If we already have a BaseReg, then assume this is the IndexReg with a
- // scale of 1.
- if (!BaseReg) {
- BaseReg = TmpReg;
- } else {
- assert (!IndexReg && "BaseReg/IndexReg already set!");
- IndexReg = TmpReg;
- Scale = 1;
- }
- break;
- case IBES_INDEX_REGISTER:
- State = IBES_START;
- break;
- }
- isPlus = false;
- }
- void onRegister(unsigned Reg) {
- switch (State) {
- default:
- State = IBES_ERROR;
- break;
- case IBES_START:
- State = IBES_REGISTER;
- TmpReg = Reg;
- break;
- case IBES_INTEGER_STAR:
- assert (!IndexReg && "IndexReg already set!");
- State = IBES_INDEX_REGISTER;
- IndexReg = Reg;
- Scale = TmpInteger;
- break;
- }
- }
- void onDispExpr() {
- switch (State) {
- default:
- State = IBES_ERROR;
- break;
- case IBES_START:
- State = IBES_DISP_EXPR;
- break;
+X86Operand *
+X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
+ unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, SMLoc Start, SMLoc End,
+ unsigned Size, StringRef Identifier,
+ InlineAsmIdentifierInfo &Info){
+ if (isa<MCSymbolRefExpr>(Disp)) {
+ // If this is not a VarDecl then assume it is a FuncDecl or some other label
+ // reference. We need an 'r' constraint here, so we need to create register
+ // operand to ensure proper matching. Just pick a GPR based on the size of
+ // a pointer.
+ if (!Info.IsVarDecl) {
+ unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+ return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
+ SMLoc(), Identifier, Info.OpDecl);
}
- }
- void onInteger(int64_t TmpInt) {
- switch (State) {
- default:
- State = IBES_ERROR;
- break;
- case IBES_START:
- State = IBES_INTEGER;
- TmpInteger = TmpInt;
- break;
- case IBES_MINUS:
- State = IBES_INTEGER;
- TmpInteger = TmpInt;
- break;
- case IBES_REGISTER_STAR:
- assert (!IndexReg && "IndexReg already set!");
- State = IBES_INDEX_REGISTER;
- IndexReg = TmpReg;
- Scale = TmpInt;
- break;
- }
- }
- void onStar() {
- switch (State) {
- default:
- State = IBES_ERROR;
- break;
- case IBES_INTEGER:
- State = IBES_INTEGER_STAR;
- break;
- case IBES_REGISTER:
- State = IBES_REGISTER_STAR;
- break;
- }
- }
- void onLBrac() {
- switch (State) {
- default:
- State = IBES_ERROR;
- break;
- case IBES_RBRAC:
- State = IBES_START;
- isPlus = true;
- break;
- }
- }
- void onRBrac() {
- switch (State) {
- default:
- State = IBES_ERROR;
- break;
- case IBES_DISP_EXPR:
- State = IBES_RBRAC;
- break;
- case IBES_INTEGER:
- State = IBES_RBRAC;
- if (isPlus)
- Disp += TmpInteger;
- else
- Disp -= TmpInteger;
- break;
- case IBES_REGISTER:
- State = IBES_RBRAC;
- // If we already have a BaseReg, then assume this is the IndexReg with a
- // scale of 1.
- if (!BaseReg) {
- BaseReg = TmpReg;
- } else {
- assert (!IndexReg && "BaseReg/IndexReg already set!");
- IndexReg = TmpReg;
- Scale = 1;
- }
- break;
- case IBES_INDEX_REGISTER:
- State = IBES_RBRAC;
- break;
- }
- }
-};
-
-X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
- SMLoc End, SMLoc SizeDirLoc,
- unsigned Size) {
- bool NeedSizeDir = false;
- bool IsVarDecl = false;
- if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
- const MCSymbol &Sym = SymRef->getSymbol();
- // FIXME: The SemaLookup will fail if the name is anything other then an
- // identifier.
- // FIXME: Pass a valid SMLoc.
- unsigned tLength, tSize, tType;
- SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
- tSize, tType, IsVarDecl);
if (!Size) {
- Size = tType * 8; // Size is in terms of bits in this context.
- NeedSizeDir = Size > 0;
+ Size = Info.Type * 8; // Size is in terms of bits in this context.
+ if (Size)
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
+ /*Len=*/0, Size));
}
}
- // If this is not a VarDecl then assume it is a FuncDecl or some other label
- // reference. We need an 'r' constraint here, so we need to create register
- // operand to ensure proper matching. Just pick a GPR based on the size of
- // a pointer.
- if (!IsVarDecl) {
- unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
- return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
- }
-
- if (NeedSizeDir)
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
- /*Len*/0, Size));
-
// When parsing inline assembly we set the base register to a non-zero value
- // as we don't know the actual value at this time. This is necessary to
+ // if we don't know the actual value at this time. This is necessary to
// get the matching correct in some cases.
- return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
- /*Scale*/1, Start, End, Size);
+ BaseReg = BaseReg ? BaseReg : 1;
+ return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+ End, Size, Identifier, Info.OpDecl);
}
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
- uint64_t ImmDisp,
- unsigned Size) {
- const AsmToken &Tok = Parser.getTok();
- SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
-
- // Eat '['
- if (getLexer().isNot(AsmToken::LBrac))
- return ErrorOperand(Start, "Expected '[' token!");
- Parser.Lex();
-
- unsigned TmpReg = 0;
-
- // Try to handle '[' 'Symbol' ']'
- if (getLexer().is(AsmToken::Identifier)) {
- if (ParseRegister(TmpReg, Start, End)) {
- const MCExpr *Disp;
- SMLoc IdentStart = Tok.getLoc();
- if (getParser().parseExpression(Disp, End))
- return 0;
-
- if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
- return Err;
-
- if (getLexer().isNot(AsmToken::RBrac))
- return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
-
- // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'.
- if (ImmDisp)
- return ErrorOperand(Start, "Unsupported immediate displacement!");
-
- // Adjust the EndLoc due to the ']'.
- End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
- Parser.Lex();
- if (!isParsingInlineAsm())
- return X86Operand::CreateMem(Disp, Start, End, Size);
-
- // We want the size directive before the '['.
- SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
- return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
+static void
+RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
+ StringRef SymName, int64_t ImmDisp,
+ int64_t FinalImmDisp, SMLoc &BracLoc,
+ SMLoc &StartInBrac, SMLoc &End) {
+ // Remove the '[' and ']' from the IR string.
+ AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
+ AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
+
+ // If ImmDisp is non-zero, then we parsed a displacement before the
+ // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
+ // If ImmDisp doesn't match the displacement computed by the state machine
+ // then we have an additional displacement in the bracketed expression.
+ if (ImmDisp != FinalImmDisp) {
+ if (ImmDisp) {
+ // We have an immediate displacement before the bracketed expression.
+ // Adjust this to match the final immediate displacement.
+ bool Found = false;
+ for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
+ E = AsmRewrites->end(); I != E; ++I) {
+ if ((*I).Loc.getPointer() > BracLoc.getPointer())
+ continue;
+ if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
+ assert (!Found && "ImmDisp already rewritten.");
+ (*I).Kind = AOK_Imm;
+ (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
+ (*I).Val = FinalImmDisp;
+ Found = true;
+ break;
+ }
+ }
+ assert (Found && "Unable to rewrite ImmDisp.");
+ } else {
+ // We have a symbolic and an immediate displacement, but no displacement
+ // before the bracketed expression. Put the immediate displacement
+ // before the bracketed expression.
+ AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
}
}
+ // Remove all the ImmPrefix rewrites within the brackets.
+ for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
+ E = AsmRewrites->end(); I != E; ++I) {
+ if ((*I).Loc.getPointer() < StartInBrac.getPointer())
+ continue;
+ if ((*I).Kind == AOK_ImmPrefix)
+ (*I).Kind = AOK_Delete;
+ }
+ const char *SymLocPtr = SymName.data();
+ // Skip everything before the symbol.
+ if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
+ assert(Len > 0 && "Expected a non-negative length.");
+ AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
+ }
+ // Skip everything after the symbol.
+ if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
+ SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
+ assert(Len > 0 && "Expected a non-negative length.");
+ AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
+ }
+}
- // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an
- // immediate displacement before the bracketed expression.
- bool Done = false;
- IntelBracExprStateMachine SM(Parser, ImmDisp);
-
- // If we parsed a register, then the end loc has already been set and
- // the identifier has already been lexed. We also need to update the
- // state.
- if (TmpReg)
- SM.onRegister(TmpReg);
+X86Operand *
+X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
+ const AsmToken &Tok = Parser.getTok();
- const MCExpr *Disp = 0;
+ bool Done = false;
while (!Done) {
bool UpdateLocLex = true;
@@ -983,6 +1235,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
// identifier. Don't try an parse it as a register.
if (Tok.getString().startswith("."))
break;
+
+ // If we're parsing an immediate expression, we don't expect a '['.
+ if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
+ break;
switch (getLexer().getKind()) {
default: {
@@ -992,139 +1248,185 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
}
return ErrorOperand(Tok.getLoc(), "Unexpected token!");
}
+ case AsmToken::EndOfStatement: {
+ Done = true;
+ break;
+ }
case AsmToken::Identifier: {
- // This could be a register or a displacement expression.
- if(!ParseRegister(TmpReg, Start, End)) {
+ // This could be a register or a symbolic displacement.
+ unsigned TmpReg;
+ const MCExpr *Val;
+ SMLoc IdentLoc = Tok.getLoc();
+ StringRef Identifier = Tok.getString();
+ if(!ParseRegister(TmpReg, IdentLoc, End)) {
SM.onRegister(TmpReg);
UpdateLocLex = false;
break;
- } else if (!getParser().parseExpression(Disp, End)) {
- SM.onDispExpr();
+ } else {
+ if (!isParsingInlineAsm()) {
+ if (getParser().parsePrimaryExpr(Val, End))
+ return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+ } else {
+ InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+ if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated*/ false, End))
+ return Err;
+ }
+ SM.onIdentifierExpr(Val, Identifier);
UpdateLocLex = false;
break;
}
return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
}
- case AsmToken::Integer: {
- int64_t Val = Tok.getIntVal();
- SM.onInteger(Val);
+ case AsmToken::Integer:
+ if (isParsingInlineAsm() && SM.getAddImmPrefix())
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+ Tok.getLoc()));
+ SM.onInteger(Tok.getIntVal());
break;
- }
case AsmToken::Plus: SM.onPlus(); break;
case AsmToken::Minus: SM.onMinus(); break;
case AsmToken::Star: SM.onStar(); break;
+ case AsmToken::Slash: SM.onDivide(); break;
case AsmToken::LBrac: SM.onLBrac(); break;
case AsmToken::RBrac: SM.onRBrac(); break;
+ case AsmToken::LParen: SM.onLParen(); break;
+ case AsmToken::RParen: SM.onRParen(); break;
}
+ if (SM.hadError())
+ return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+
if (!Done && UpdateLocLex) {
End = Tok.getLoc();
Parser.Lex(); // Consume the token.
}
}
+ return 0;
+}
- if (!Disp)
- Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+ int64_t ImmDisp,
+ unsigned Size) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
+ if (getLexer().isNot(AsmToken::LBrac))
+ return ErrorOperand(BracLoc, "Expected '[' token!");
+ Parser.Lex(); // Eat '['
+
+ SMLoc StartInBrac = Tok.getLoc();
+ // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
+ // may have already parsed an immediate displacement before the bracketed
+ // expression.
+ IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
+ if (X86Operand *Err = ParseIntelExpression(SM, End))
+ return Err;
+
+ const MCExpr *Disp;
+ if (const MCExpr *Sym = SM.getSym()) {
+ // A symbolic displacement.
+ Disp = Sym;
+ if (isParsingInlineAsm())
+ RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
+ ImmDisp, SM.getImm(), BracLoc, StartInBrac,
+ End);
+ } else {
+ // An immediate displacement only.
+ Disp = MCConstantExpr::Create(SM.getImm(), getContext());
+ }
// Parse the dot operator (e.g., [ebx].foo.bar).
if (Tok.getString().startswith(".")) {
- SmallString<64> Err;
const MCExpr *NewDisp;
- if (ParseIntelDotOperator(Disp, &NewDisp, Err))
- return ErrorOperand(Tok.getLoc(), Err);
+ if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp))
+ return Err;
- End = Parser.getTok().getEndLoc();
+ End = Tok.getEndLoc();
Parser.Lex(); // Eat the field.
Disp = NewDisp;
}
int BaseReg = SM.getBaseReg();
int IndexReg = SM.getIndexReg();
-
- // handle [-42]
- if (!BaseReg && !IndexReg) {
- if (!SegReg)
- return X86Operand::CreateMem(Disp, Start, End);
- else
- return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+ int Scale = SM.getScale();
+ if (!isParsingInlineAsm()) {
+ // handle [-42]
+ if (!BaseReg && !IndexReg) {
+ if (!SegReg)
+ return X86Operand::CreateMem(Disp, Start, End, Size);
+ else
+ return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+ }
+ return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+ End, Size);
}
- int Scale = SM.getScale();
- return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
- Start, End, Size);
+ InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+ return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+ End, Size, SM.getSymName(), Info);
}
// Inline assembly may use variable names with namespace alias qualifiers.
-X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
- SMLoc &IdentStart) {
- // We should only see Foo::Bar if we're parsing inline assembly.
- if (!isParsingInlineAsm())
- return 0;
-
- // If we don't see a ':' then there can't be a qualifier.
- if (getLexer().isNot(AsmToken::Colon))
- return 0;
+X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
+ StringRef &Identifier,
+ InlineAsmIdentifierInfo &Info,
+ bool IsUnevaluatedOperand,
+ SMLoc &End) {
+ assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
+ Val = 0;
+ StringRef LineBuf(Identifier.data());
+ SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
- bool Done = false;
const AsmToken &Tok = Parser.getTok();
- SMLoc IdentEnd = Tok.getEndLoc();
- while (!Done) {
- switch (getLexer().getKind()) {
- default:
- Done = true;
- break;
- case AsmToken::Colon:
- getLexer().Lex(); // Consume ':'.
- if (getLexer().isNot(AsmToken::Colon))
- return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
- getLexer().Lex(); // Consume second ':'.
- if (getLexer().isNot(AsmToken::Identifier))
- return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
- break;
- case AsmToken::Identifier:
- IdentEnd = Tok.getEndLoc();
- getLexer().Lex(); // Consume the identifier.
- break;
- }
+
+ // Advance the token stream until the end of the current token is
+ // after the end of what the frontend claimed.
+ const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
+ while (true) {
+ End = Tok.getEndLoc();
+ getLexer().Lex();
+
+ assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
+ if (End.getPointer() == EndPtr) break;
}
- size_t Len = IdentEnd.getPointer() - IdentStart.getPointer();
- StringRef Identifier(IdentStart.getPointer(), Len);
+
+ // Create the symbol reference.
+ Identifier = LineBuf;
MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+ Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
return 0;
}
/// ParseIntelMemOperand - Parse intel style memory operand.
X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
- uint64_t ImmDisp,
+ int64_t ImmDisp,
SMLoc Start) {
const AsmToken &Tok = Parser.getTok();
SMLoc End;
unsigned Size = getIntelMemOperandSize(Tok.getString());
if (Size) {
- Parser.Lex();
- assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
- "Unexpected token!");
- Parser.Lex();
+ Parser.Lex(); // Eat operand size (e.g., byte, word).
+ if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
+ return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
+ Parser.Lex(); // Eat ptr.
}
// Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
if (getLexer().is(AsmToken::Integer)) {
- const AsmToken &IntTok = Parser.getTok();
if (isParsingInlineAsm())
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
- IntTok.getLoc()));
- uint64_t ImmDisp = IntTok.getIntVal();
+ Tok.getLoc()));
+ int64_t ImmDisp = Tok.getIntVal();
Parser.Lex(); // Eat the integer.
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
- return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+ return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
}
if (getLexer().is(AsmToken::LBrac))
- return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+ return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
if (!ParseRegister(SegReg, Start, End)) {
// Handel SegReg : [ ... ]
@@ -1133,37 +1435,37 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
Parser.Lex(); // Eat :
if (getLexer().isNot(AsmToken::LBrac))
return ErrorOperand(Start, "Expected '[' token!");
- return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+ return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
}
- const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
- SMLoc IdentStart = Tok.getLoc();
- if (getParser().parseExpression(Disp, End))
- return 0;
+ const MCExpr *Val;
+ if (!isParsingInlineAsm()) {
+ if (getParser().parsePrimaryExpr(Val, End))
+ return ErrorOperand(Tok.getLoc(), "Unexpected token!");
- if (!isParsingInlineAsm())
- return X86Operand::CreateMem(Disp, Start, End, Size);
+ return X86Operand::CreateMem(Val, Start, End, Size);
+ }
- if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+ InlineAsmIdentifierInfo Info;
+ StringRef Identifier = Tok.getString();
+ if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated*/ false, End))
return Err;
-
- return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
+ return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
+ /*Scale=*/1, Start, End, Size, Identifier, Info);
}
/// Parse the '.' operator.
-bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
- const MCExpr **NewDisp,
- SmallString<64> &Err) {
- AsmToken Tok = *&Parser.getTok();
- uint64_t OrigDispVal, DotDispVal;
+X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+ const MCExpr *&NewDisp) {
+ const AsmToken &Tok = Parser.getTok();
+ int64_t OrigDispVal, DotDispVal;
// FIXME: Handle non-constant expressions.
- if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
+ if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
OrigDispVal = OrigDisp->getValue();
- } else {
- Err = "Non-constant offsets are not supported!";
- return true;
- }
+ else
+ return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!");
// Drop the '.'.
StringRef DotDispStr = Tok.getString().drop_front(1);
@@ -1173,23 +1475,15 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
APInt DotDisp;
DotDispStr.getAsInteger(10, DotDisp);
DotDispVal = DotDisp.getZExtValue();
- } else if (Tok.is(AsmToken::Identifier)) {
- // We should only see an identifier when parsing the original inline asm.
- // The front-end should rewrite this in terms of immediates.
- assert (isParsingInlineAsm() && "Unexpected field name!");
-
+ } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
unsigned DotDisp;
std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
- DotDisp)) {
- Err = "Unable to lookup field reference!";
- return true;
- }
+ DotDisp))
+ return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!");
DotDispVal = DotDisp;
- } else {
- Err = "Unexpected token type!";
- return true;
- }
+ } else
+ return ErrorOperand(Tok.getLoc(), "Unexpected token type!");
if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
@@ -1199,22 +1493,24 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
Val));
}
- *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
- return false;
+ NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+ return 0;
}
/// Parse the 'offset' operator. This operator is used to specify the
/// location rather then the content of a variable.
-X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
- SMLoc OffsetOfLoc = Start;
+X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc OffsetOfLoc = Tok.getLoc();
Parser.Lex(); // Eat offset.
- Start = Parser.getTok().getLoc();
- assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
- SMLoc End;
const MCExpr *Val;
- if (getParser().parseExpression(Val, End))
- return ErrorOperand(Start, "Unable to parse expression!");
+ InlineAsmIdentifierInfo Info;
+ SMLoc Start = Tok.getLoc(), End;
+ StringRef Identifier = Tok.getString();
+ if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated*/ false, End))
+ return Err;
// Don't emit the offset operator.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -1224,7 +1520,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
// the size of a pointer.
unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
- OffsetOfLoc);
+ OffsetOfLoc, Identifier, Info.OpDecl);
}
enum IntelOperatorKind {
@@ -1239,34 +1535,25 @@ enum IntelOperatorKind {
/// variable. A variable's size is the product of its LENGTH and TYPE. The
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
-X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
- SMLoc TypeLoc = Start;
- Parser.Lex(); // Eat offset.
- Start = Parser.getTok().getLoc();
- assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
-
- SMLoc End;
- const MCExpr *Val;
- if (getParser().parseExpression(Val, End))
- return 0;
+X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc TypeLoc = Tok.getLoc();
+ Parser.Lex(); // Eat operator.
+
+ const MCExpr *Val = 0;
+ InlineAsmIdentifierInfo Info;
+ SMLoc Start = Tok.getLoc(), End;
+ StringRef Identifier = Tok.getString();
+ if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated*/ true, End))
+ return Err;
- unsigned Length = 0, Size = 0, Type = 0;
- if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
- const MCSymbol &Sym = SymRef->getSymbol();
- // FIXME: The SemaLookup will fail if the name is anything other then an
- // identifier.
- // FIXME: Pass a valid SMLoc.
- bool IsVarDecl;
- if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
- Size, Type, IsVarDecl))
- return ErrorOperand(Start, "Unable to lookup expr!");
- }
- unsigned CVal;
+ unsigned CVal = 0;
switch(OpKind) {
default: llvm_unreachable("Unexpected operand kind!");
- case IOK_LENGTH: CVal = Length; break;
- case IOK_SIZE: CVal = Size; break;
- case IOK_TYPE: CVal = Type; break;
+ case IOK_LENGTH: CVal = Info.Length; break;
+ case IOK_SIZE: CVal = Info.Size; break;
+ case IOK_TYPE: CVal = Info.Type; break;
}
// Rewrite the type operator and the C or C++ type or variable in terms of an
@@ -1279,44 +1566,54 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
}
X86Operand *X86AsmParser::ParseIntelOperand() {
- SMLoc Start = Parser.getTok().getLoc(), End;
- StringRef AsmTokStr = Parser.getTok().getString();
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc Start = Tok.getLoc(), End;
// Offset, length, type and size operators.
if (isParsingInlineAsm()) {
+ StringRef AsmTokStr = Tok.getString();
if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
- return ParseIntelOffsetOfOperator(Start);
+ return ParseIntelOffsetOfOperator();
if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
- return ParseIntelOperator(Start, IOK_LENGTH);
+ return ParseIntelOperator(IOK_LENGTH);
if (AsmTokStr == "size" || AsmTokStr == "SIZE")
- return ParseIntelOperator(Start, IOK_SIZE);
+ return ParseIntelOperator(IOK_SIZE);
if (AsmTokStr == "type" || AsmTokStr == "TYPE")
- return ParseIntelOperator(Start, IOK_TYPE);
+ return ParseIntelOperator(IOK_TYPE);
}
// Immediate.
- if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
- getLexer().is(AsmToken::Minus)) {
- const MCExpr *Val;
- bool isInteger = getLexer().is(AsmToken::Integer);
- if (!getParser().parseExpression(Val, End)) {
- if (isParsingInlineAsm())
+ if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
+ getLexer().is(AsmToken::LParen)) {
+ AsmToken StartTok = Tok;
+ IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
+ /*AddImmPrefix=*/false);
+ if (X86Operand *Err = ParseIntelExpression(SM, End))
+ return Err;
+
+ int64_t Imm = SM.getImm();
+ if (isParsingInlineAsm()) {
+ unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
+ if (StartTok.getString().size() == Len)
+ // Just add a prefix if this wasn't a complex immediate expression.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
- // Immediate.
- if (getLexer().isNot(AsmToken::LBrac))
- return X86Operand::CreateImm(Val, Start, End);
-
- // Only positive immediates are valid.
- if (!isInteger) {
- Error(Parser.getTok().getLoc(), "expected a positive immediate "
- "displacement before bracketed expr.");
- return 0;
- }
+ else
+ // Otherwise, rewrite the complex expression as a single immediate.
+ InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
+ }
- // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
- if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
- return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
+ if (getLexer().isNot(AsmToken::LBrac)) {
+ const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
+ return X86Operand::CreateImm(ImmExpr, Start, End);
}
+
+ // Only positive immediates are valid.
+ if (Imm < 0)
+ return ErrorOperand(Start, "expected a positive immediate displacement "
+ "before bracketed expr.");
+
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start);
}
// Register.
@@ -1907,7 +2204,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
assert(!Operands.empty() && "Unexpect empty operand list!");
X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
- ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
+ ArrayRef<SMRange> EmptyRanges = None;
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 3669560..d8f7278 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -20,6 +20,7 @@
#include "X86MCTargetDesc.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCInstrInfo.h"
namespace llvm {
@@ -41,7 +42,6 @@ namespace X86 {
AddrNumOperands = 5
};
} // end namespace X86;
-
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -274,11 +274,12 @@ namespace X86II {
//// MRM_XX - A mod/rm byte of exactly 0xXX.
MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
- MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
- MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
- MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50,
- MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54,
- MRM_DD = 55, MRM_DE = 56, MRM_DF = 57,
+ MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39, MRM_CB = 40,
+ MRM_E8 = 41, MRM_F0 = 42, MRM_F8 = 45, MRM_F9 = 46,
+ MRM_D0 = 47, MRM_D1 = 48, MRM_D4 = 49, MRM_D5 = 50,
+ MRM_D6 = 51, MRM_D8 = 52, MRM_D9 = 53, MRM_DA = 54,
+ MRM_DB = 55, MRM_DC = 56, MRM_DD = 57, MRM_DE = 58,
+ MRM_DF = 59,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
/// immediates, the first of which is a 16-bit immediate (specified by
@@ -521,6 +522,26 @@ namespace X86II {
}
}
+ /// getOperandBias - compute any additional adjustment needed to
+ /// the offset to the start of the memory operand
+ /// in this instruction.
+ /// If this is a two-address instruction,skip one of the register operands.
+ /// FIXME: This should be handled during MCInst lowering.
+ inline int getOperandBias(const MCInstrDesc& Desc)
+ {
+ unsigned NumOps = Desc.getNumOperands();
+ unsigned CurOp = 0;
+ if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
+ ++CurOp;
+ else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+ assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+ // Special case for GATHER with 2 TIED_TO operands
+ // Skip the first 2 operands: dst, mask_wb
+ CurOp += 2;
+ }
+ return CurOp;
+ }
+
/// getMemoryOperandNo - The function returns the MCInst operand # for the
/// first field of the memory operand. If the instruction doesn't have a
/// memory operand, this returns -1.
@@ -576,12 +597,13 @@ namespace X86II {
}
case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
- case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
- case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
- case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
- case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
- case X86II::MRM_DE: case X86II::MRM_DF:
+ case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_E8:
+ case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9:
+ case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
+ case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
+ case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
+ case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
+ case X86II::MRM_DF:
return -1;
}
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 776cee1..016af71 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -237,6 +237,14 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) {
return GOT_Normal;
}
+static bool HasSecRelSymbolRef(const MCExpr *Expr) {
+ if (Expr->getKind() == MCExpr::SymbolRef) {
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ return Ref->getKind() == MCSymbolRefExpr::VK_SECREL;
+ }
+ return false;
+}
+
void X86MCCodeEmitter::
EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
@@ -268,8 +276,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
if (Kind == GOT_Normal)
ImmOffset = CurByte;
} else if (Expr->getKind() == MCExpr::SymbolRef) {
- const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
- if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) {
+ if (HasSecRelSymbolRef(Expr)) {
+ FixupKind = MCFixupKind(FK_SecRel_4);
+ }
+ } else if (Expr->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *Bin = static_cast<const MCBinaryExpr*>(Expr);
+ if (HasSecRelSymbolRef(Bin->getLHS())
+ || HasSecRelSymbolRef(Bin->getRHS())) {
FixupKind = MCFixupKind(FK_SecRel_4);
}
}
@@ -979,18 +992,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
return;
- // If this is a two-address instruction, skip one of the register operands.
- // FIXME: This should be handled during MCInst lowering.
unsigned NumOps = Desc.getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
- ++CurOp;
- else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
- assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
- // Special case for GATHER with 2 TIED_TO operands
- // Skip the first 2 operands: dst, mask_wb
- CurOp += 2;
- }
+ unsigned CurOp = X86II::getOperandBias(Desc);
// Keep track of the current byte being emitted.
unsigned CurByte = 0;
@@ -1138,12 +1141,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
break;
case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
- case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
- case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
- case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
- case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
- case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0:
- case X86II::MRM_F8: case X86II::MRM_F9:
+ case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_D0:
+ case X86II::MRM_D1: case X86II::MRM_D4: case X86II::MRM_D5:
+ case X86II::MRM_D6: case X86II::MRM_D8: case X86II::MRM_D9:
+ case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
+ case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
+ case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
+ case X86II::MRM_F9:
EmitByte(BaseOpcode, CurByte, OS);
unsigned char MRM;
@@ -1155,6 +1159,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_C4: MRM = 0xC4; break;
case X86II::MRM_C8: MRM = 0xC8; break;
case X86II::MRM_C9: MRM = 0xC9; break;
+ case X86II::MRM_CA: MRM = 0xCA; break;
+ case X86II::MRM_CB: MRM = 0xCB; break;
case X86II::MRM_D0: MRM = 0xD0; break;
case X86II::MRM_D1: MRM = 0xD1; break;
case X86II::MRM_D4: MRM = 0xD4; break;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index bc272ef..ed64a32 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -9,6 +9,8 @@
#include "MCTargetDesc/X86FixupKinds.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -27,7 +29,9 @@ namespace {
X86WinCOFFObjectWriter(bool Is64Bit_);
~X86WinCOFFObjectWriter();
- virtual unsigned getRelocType(unsigned FixupKind) const;
+ virtual unsigned getRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsCrossSection) const LLVM_OVERRIDE;
};
}
@@ -38,7 +42,14 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_)
X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
-unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
+unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsCrossSection) const {
+ unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind();
+
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
switch (FixupKind) {
case FK_PCRel_4:
case X86::reloc_riprel_4byte:
@@ -46,6 +57,9 @@ unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32;
case FK_Data_4:
case X86::reloc_signed_4byte:
+ if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
+ return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32NB :
+ COFF::IMAGE_REL_I386_DIR32NB;
return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32;
case FK_Data_8:
if (Is64Bit)
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 1f9919f..947002f 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -69,6 +69,11 @@ ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
/// createX86PadShortFunctions - Return a pass that pads short functions
/// with NOOPs. This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
+/// createX86FixupLEAs - Return a a pass that selectively replaces
+/// certain instructions (like add, sub, inc, dec, some shifts,
+/// and some multiplies) by equivalent LEA instructions, in order
+/// to eliminate execution delays in some Atom processors.
+FunctionPass *createX86FixupLEAs();
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 1dcc344..c865500 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -139,6 +139,8 @@ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"CallRegIndirect", "true",
"Call register indirect">;
+def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+ "LEA instruction needs inputs at AG stage">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -188,6 +190,7 @@ def : ProcessorModel<"atom", AtomModel,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
FeatureSlowDivide,
FeatureCallRegIndirect,
+ FeatureLEAUsesAG,
FeaturePadShortFunctions]>;
// "Arrandale" along with corei3 and corei5
@@ -252,11 +255,16 @@ def : Proc<"amdfam10", [FeatureSSE4A,
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeatureLZCNT, FeaturePOPCNT]>;
+// Jaguar
+def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
+ FeatureAES, FeaturePCLMUL, FeatureBMI,
+ FeatureF16C, FeatureMOVBE, FeatureLZCNT,
+ FeaturePOPCNT]>;
// Bulldozer
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePCLMUL,
FeatureLZCNT, FeaturePOPCNT]>;
-// Enhanced Bulldozer
+// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePCLMUL,
FeatureF16C, FeatureLZCNT,
@@ -300,6 +308,9 @@ def ATTAsmParser : AsmParser {
def ATTAsmParserVariant : AsmParserVariant {
int Variant = 0;
+ // Variant name.
+ string Name = "att";
+
// Discard comments in assembly strings.
string CommentDelimiter = "#";
@@ -310,6 +321,9 @@ def ATTAsmParserVariant : AsmParserVariant {
def IntelAsmParserVariant : AsmParserVariant {
int Variant = 1;
+ // Variant name.
+ string Name = "intel";
+
// Discard comments in assembly strings.
string CommentDelimiter = ";";
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
index 2518e02..8fea6ed 100644
--- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -1451,6 +1451,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
MCE.emitByte(BaseOpcode);
MCE.emitByte(0xC9);
break;
+ case X86II::MRM_CA:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xCA);
+ break;
+ case X86II::MRM_CB:
+ MCE.emitByte(BaseOpcode);
+ MCE.emitByte(0xCB);
+ break;
case X86II::MRM_E8:
MCE.emitByte(BaseOpcode);
MCE.emitByte(0xE8);
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index cadec68..cf44bd0 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -68,12 +68,12 @@ public:
virtual bool TargetSelectInstruction(const Instruction *I);
- /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+ /// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
/// possible.
- virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI);
+ virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
virtual bool FastLowerArguments();
@@ -107,6 +107,8 @@ private:
bool X86SelectShift(const Instruction *I);
+ bool X86SelectDivRem(const Instruction *I);
+
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
@@ -691,11 +693,6 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (S->isAtomic())
return false;
- unsigned SABIAlignment =
- TD.getABITypeAlignment(S->getValueOperand()->getType());
- if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
- return false;
-
MVT VT;
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
return false;
@@ -1235,6 +1232,124 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
return true;
}
+bool X86FastISel::X86SelectDivRem(const Instruction *I) {
+ const static unsigned NumTypes = 4; // i8, i16, i32, i64
+ const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
+ const static bool S = true; // IsSigned
+ const static bool U = false; // !IsSigned
+ const static unsigned Copy = TargetOpcode::COPY;
+ // For the X86 DIV/IDIV instruction, in most cases the dividend
+ // (numerator) must be in a specific register pair highreg:lowreg,
+ // producing the quotient in lowreg and the remainder in highreg.
+ // For most data types, to set up the instruction, the dividend is
+ // copied into lowreg, and lowreg is sign-extended or zero-extended
+ // into highreg. The exception is i8, where the dividend is defined
+ // as a single register rather than a register pair, and we
+ // therefore directly sign-extend or zero-extend the dividend into
+ // lowreg, instead of copying, and ignore the highreg.
+ const static struct DivRemEntry {
+ // The following portion depends only on the data type.
+ const TargetRegisterClass *RC;
+ unsigned LowInReg; // low part of the register pair
+ unsigned HighInReg; // high part of the register pair
+ // The following portion depends on both the data type and the operation.
+ struct DivRemResult {
+ unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
+ unsigned OpSignExtend; // Opcode for sign-extending lowreg into
+ // highreg, or copying a zero into highreg.
+ unsigned OpCopy; // Opcode for copying dividend into lowreg, or
+ // zero/sign-extending into lowreg for i8.
+ unsigned DivRemResultReg; // Register containing the desired result.
+ bool IsOpSigned; // Whether to use signed or unsigned form.
+ } ResultTable[NumOps];
+ } OpTable[NumTypes] = {
+ { &X86::GR8RegClass, X86::AX, 0, {
+ { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv
+ { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem
+ { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv
+ { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem
+ }
+ }, // i8
+ { &X86::GR16RegClass, X86::AX, X86::DX, {
+ { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv
+ { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem
+ { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv
+ { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem
+ }
+ }, // i16
+ { &X86::GR32RegClass, X86::EAX, X86::EDX, {
+ { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv
+ { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem
+ { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv
+ { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem
+ }
+ }, // i32
+ { &X86::GR64RegClass, X86::RAX, X86::RDX, {
+ { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
+ { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
+ { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv
+ { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem
+ }
+ }, // i64
+ };
+
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ unsigned TypeIndex, OpIndex;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::i8: TypeIndex = 0; break;
+ case MVT::i16: TypeIndex = 1; break;
+ case MVT::i32: TypeIndex = 2; break;
+ case MVT::i64: TypeIndex = 3;
+ if (!Subtarget->is64Bit())
+ return false;
+ break;
+ }
+
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unexpected div/rem opcode");
+ case Instruction::SDiv: OpIndex = 0; break;
+ case Instruction::SRem: OpIndex = 1; break;
+ case Instruction::UDiv: OpIndex = 2; break;
+ case Instruction::URem: OpIndex = 3; break;
+ }
+
+ const DivRemEntry &TypeEntry = OpTable[TypeIndex];
+ const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
+ unsigned Op0Reg = getRegForValue(I->getOperand(0));
+ if (Op0Reg == 0)
+ return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0)
+ return false;
+
+ // Move op0 into low-order input register.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
+ // Zero-extend or sign-extend into high-order input register.
+ if (OpEntry.OpSignExtend) {
+ if (OpEntry.IsOpSigned)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(OpEntry.OpSignExtend));
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
+ }
+ // Generate the DIV/IDIV instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
+ // Copy output register into result register.
+ unsigned ResultReg = createResultReg(TypeEntry.RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
+ UpdateValueMap(I, ResultReg);
+
+ return true;
+}
+
bool X86FastISel::X86SelectSelect(const Instruction *I) {
MVT VT;
if (!isTypeLegal(I->getType(), VT))
@@ -2084,6 +2199,11 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
case Instruction::AShr:
case Instruction::Shl:
return X86SelectShift(I);
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ return X86SelectDivRem(I);
case Instruction::Select:
return X86SelectSelect(I);
case Instruction::Trunc:
@@ -2275,12 +2395,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
}
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
-/// vreg is being provided by the specified load instruction. If possible,
-/// try to fold the load as an operand to the instruction, returning true if
-/// possible.
-bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI) {
+bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
X86AddressMode AM;
if (!X86SelectAddress(LI->getOperand(0), AM))
return false;
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
new file mode 100644
index 0000000..0dd034c
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -0,0 +1,253 @@
+//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will find instructions which
+// can be re-written as LEA instructions in order to reduce pipeline
+// delays for some models of the Intel Atom family.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-fixup-LEAs"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumLEAs, "Number of LEA instructions created");
+
+namespace {
+ class FixupLEAPass : public MachineFunctionPass {
+ enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
+ static char ID;
+ /// \brief Loop over all of the instructions in the basic block
+ /// replacing applicable instructions with LEA instructions,
+ /// where appropriate.
+ bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
+
+ virtual const char *getPassName() const { return "X86 Atom LEA Fixup";}
+
+ /// \brief Given a machine register, look for the instruction
+ /// which writes it in the current basic block. If found,
+ /// try to replace it with an equivalent LEA instruction.
+ /// If replacement succeeds, then also process the the newly created
+ /// instruction.
+ void seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+
+ /// \brief Given a memory access or LEA instruction
+ /// whose address mode uses a base and/or index register, look for
+ /// an opportunity to replace the instruction which sets the base or index
+ /// register with an equivalent LEA instruction.
+ void processInstruction(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+
+ /// \brief Determine if an instruction references a machine register
+ /// and, if so, whether it reads or writes the register.
+ RegUsageState usesRegister(MachineOperand& p,
+ MachineBasicBlock::iterator I);
+
+ /// \brief Step backwards through a basic block, looking
+ /// for an instruction which writes a register within
+ /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
+ MachineBasicBlock::iterator searchBackwards(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+
+ /// \brief if an instruction can be converted to an
+ /// equivalent LEA, insert the new instruction into the basic block
+ /// and return a pointer to it. Otherwise, return zero.
+ MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI) const;
+
+ public:
+ FixupLEAPass() : MachineFunctionPass(ID) {}
+
+ /// \brief Loop over all of the basic blocks,
+ /// replacing instructions by equivalent LEA instructions
+ /// if needed and when possible.
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII; // Machine instruction info.
+
+ };
+ char FixupLEAPass::ID = 0;
+}
+
+MachineInstr *
+FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI) const {
+ MachineInstr* MI = MBBI;
+ MachineInstr* NewMI;
+ switch (MI->getOpcode()) {
+ case X86::MOV32rr:
+ case X86::MOV64rr: {
+ const MachineOperand& Src = MI->getOperand(1);
+ const MachineOperand& Dest = MI->getOperand(0);
+ NewMI = BuildMI(*MF, MI->getDebugLoc(),
+ TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r))
+ .addOperand(Dest)
+ .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0);
+ MFI->insert(MBBI, NewMI); // Insert the new inst
+ return NewMI;
+ }
+ case X86::ADD64ri32:
+ case X86::ADD64ri8:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8_DB:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri_DB:
+ case X86::ADD32ri8_DB:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri_DB:
+ case X86::ADD16ri8_DB:
+ if (!MI->getOperand(2).isImm()) {
+ // convertToThreeAddress will call getImm()
+ // which requires isImm() to be true
+ return 0;
+ }
+ }
+ return TII->convertToThreeAddress(MFI, MBBI, 0);
+}
+
+FunctionPass *llvm::createX86FixupLEAs() {
+ return new FixupLEAPass();
+}
+
+bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ TII = Func.getTarget().getInstrInfo();
+ TM = &MF->getTarget();
+
+ DEBUG(dbgs() << "Start X86FixupLEAs\n";);
+ // Process all basic blocks.
+ for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
+ processBasicBlock(Func, I);
+ DEBUG(dbgs() << "End X86FixupLEAs\n";);
+
+ return true;
+}
+
+FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
+ MachineBasicBlock::iterator I) {
+ RegUsageState RegUsage = RU_NotUsed;
+ MachineInstr* MI = I;
+
+ for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+ MachineOperand& opnd = MI->getOperand(i);
+ if (opnd.isReg() && opnd.getReg() == p.getReg()){
+ if (opnd.isDef())
+ return RU_Write;
+ RegUsage = RU_Read;
+ }
+ }
+ return RegUsage;
+}
+
+/// getPreviousInstr - Given a reference to an instruction in a basic
+/// block, return a reference to the previous instruction in the block,
+/// wrapping around to the last instruction of the block if the block
+/// branches to itself.
+static inline bool getPreviousInstr(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ if (I == MFI->begin()) {
+ if (MFI->isPredecessor(MFI)) {
+ I = --MFI->end();
+ return true;
+ }
+ else
+ return false;
+ }
+ --I;
+ return true;
+}
+
+MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ int InstrDistance = 1;
+ MachineBasicBlock::iterator CurInst;
+ static const int INSTR_DISTANCE_THRESHOLD = 5;
+
+ CurInst = I;
+ bool Found;
+ Found = getPreviousInstr(CurInst, MFI);
+ while( Found && I != CurInst) {
+ if (CurInst->isCall() || CurInst->isInlineAsm())
+ break;
+ if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
+ break; // too far back to make a difference
+ if (usesRegister(p, CurInst) == RU_Write){
+ return CurInst;
+ }
+ InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
+ Found = getPreviousInstr(CurInst, MFI);
+ }
+ return 0;
+}
+
+void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ // Process a load, store, or LEA instruction.
+ MachineInstr *MI = I;
+ int opcode = MI->getOpcode();
+ const MCInstrDesc& Desc = MI->getDesc();
+ int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode);
+ if (AddrOffset >= 0) {
+ AddrOffset += X86II::getOperandBias(Desc);
+ MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
+ if (p.isReg() && p.getReg() != X86::ESP) {
+ seekLEAFixup(p, I, MFI);
+ }
+ MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
+ if (q.isReg() && q.getReg() != X86::ESP) {
+ seekLEAFixup(q, I, MFI);
+ }
+ }
+}
+
+void FixupLEAPass::seekLEAFixup(MachineOperand& p,
+ MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI) {
+ MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
+ if (MBI) {
+ MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI);
+ if (NewMI) {
+ ++NumLEAs;
+ DEBUG(dbgs() << "Candidate to replace:"; MBI->dump(););
+ // now to replace with an equivalent LEA...
+ DEBUG(dbgs() << "Replaced by: "; NewMI->dump(););
+ MFI->erase(MBI);
+ MachineBasicBlock::iterator J =
+ static_cast<MachineBasicBlock::iterator> (NewMI);
+ processInstruction(J, MFI);
+ }
+ }
+}
+
+bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
+ MachineFunction::iterator MFI) {
+
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+ processInstruction(I, MFI);
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 54cbd40..42b4e73 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -369,7 +369,14 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
/// getCompactUnwindRegNum - Get the compact unwind number for a given
/// register. The number corresponds to the enum lists in
/// compact_unwind_encoding.h.
-static int getCompactUnwindRegNum(const uint16_t *CURegs, unsigned Reg) {
+static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) {
+ static const uint16_t CU32BitRegs[] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const uint16_t CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs;
for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
if (*CURegs == Reg)
return Idx;
@@ -398,16 +405,8 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
// 4 3
// 5 3
//
- static const uint16_t CU32BitRegs[] = {
- X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
- };
- static const uint16_t CU64BitRegs[] = {
- X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
- const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
-
for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
- int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
+ int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit);
if (CUReg == -1) return ~0U;
SavedRegs[i] = CUReg;
}
@@ -466,14 +465,6 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
static uint32_t
encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
bool Is64Bit) {
- static const uint16_t CU32BitRegs[] = {
- X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
- };
- static const uint16_t CU64BitRegs[] = {
- X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
- const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
-
// Encode the registers in the order they were saved, 3-bits per register. The
// registers are numbered from 1 to CU_NUM_SAVED_REGS.
uint32_t RegEnc = 0;
@@ -481,7 +472,7 @@ encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
unsigned Reg = SavedRegs[I];
if (Reg == 0) continue;
- int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
+ int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit);
if (CURegNum == -1) return ~0U;
// Encode the 3-bit register number in order, skipping over 3-bits for each
@@ -528,11 +519,17 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
if (!MI.getFlag(MachineInstr::FrameSetup)) break;
// We don't exect any more prolog instructions.
- if (ExpectEnd) return 0;
+ if (ExpectEnd) return CU::UNWIND_MODE_DWARF;
if (Opc == PushInstr) {
// If there are too many saved registers, we cannot use compact encoding.
- if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
+ if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF;
+
+ unsigned Reg = MI.getOperand(0).getReg();
+ if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) {
+ ExpectEnd = true;
+ continue;
+ }
SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
StackAdjust += OffsetSize;
@@ -542,7 +539,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
unsigned DstReg = MI.getOperand(0).getReg();
if (DstReg != FramePtr || SrcReg != StackPtr)
- return 0;
+ return CU::UNWIND_MODE_DWARF;
StackAdjust = 0;
memset(SavedRegs, 0, sizeof(SavedRegs));
@@ -552,7 +549,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
if (StackSize)
// We already have a stack size.
- return 0;
+ return CU::UNWIND_MODE_DWARF;
if (!MI.getOperand(0).isReg() ||
MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
@@ -560,7 +557,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
// We need this to be a stack adjustment pointer. Something like:
//
// %RSP<def> = SUB64ri8 %RSP, 48
- return 0;
+ return CU::UNWIND_MODE_DWARF;
StackSize = MI.getOperand(2).getImm() / StackDivide;
SubtractInstrIdx += InstrOffset;
@@ -574,31 +571,31 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
if (HasFP) {
if ((StackAdjust & 0xFF) != StackAdjust)
// Offset was too big for compact encoding.
- return 0;
+ return CU::UNWIND_MODE_DWARF;
// Get the encoding of the saved registers when we have a frame pointer.
uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
- if (RegEnc == ~0U) return 0;
+ if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
- CompactUnwindEncoding |= 0x01000000;
+ CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
- CompactUnwindEncoding |= RegEnc & 0x7FFF;
+ CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
} else {
++StackAdjust;
uint32_t TotalStackSize = StackAdjust + StackSize;
if ((TotalStackSize & 0xFF) == TotalStackSize) {
// Frameless stack with a small stack size.
- CompactUnwindEncoding |= 0x02000000;
+ CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
// Encode the stack size.
CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
} else {
if ((StackAdjust & 0x7) != StackAdjust)
// The extra stack adjustments are too big for us to handle.
- return 0;
+ return CU::UNWIND_MODE_DWARF;
// Frameless stack with an offset too large for us to encode compactly.
- CompactUnwindEncoding |= 0x03000000;
+ CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
// Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
// instruction.
@@ -616,10 +613,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
uint32_t RegEnc =
encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
Is64Bit);
- if (RegEnc == ~0U) return 0;
+ if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
// Encode the register encoding.
- CompactUnwindEncoding |= RegEnc & 0x3FF;
+ CompactUnwindEncoding |=
+ RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
}
return CompactUnwindEncoding;
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
index 3f08b9a..6e309d8 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -19,8 +19,35 @@
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class MCSymbol;
- class X86TargetMachine;
+
+namespace CU {
+
+ /// Compact unwind encoding values.
+ enum CompactUnwindEncodings {
+ /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
+ /// the return address, then [RE]SP is moved to [RE]BP.
+ UNWIND_MODE_BP_FRAME = 0x01000000,
+
+ /// A frameless function with a small constant stack size.
+ UNWIND_MODE_STACK_IMMD = 0x02000000,
+
+ /// A frameless function with a large constant stack size.
+ UNWIND_MODE_STACK_IND = 0x03000000,
+
+ /// No compact unwind encoding is available.
+ UNWIND_MODE_DWARF = 0x04000000,
+
+ /// Mask for encoding the frame registers.
+ UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
+
+ /// Mask for encoding the frameless registers.
+ UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
+ };
+
+} // end CU namespace
+
+class MCSymbol;
+class X86TargetMachine;
class X86FrameLowering : public TargetFrameLowering {
const X86TargetMachine &TM;
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6041669..968b358 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1503,8 +1503,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
- MVT::i32, MVT::i32, MVT::Other, Ops,
- array_lengthof(Ops));
+ MVT::i32, MVT::i32, MVT::Other, Ops);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
return ResNode;
}
@@ -1720,7 +1719,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
Op = ADD;
break;
}
-
+
Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
bool isUnOp = !Val.getNode();
bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
@@ -1772,12 +1771,10 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
if (isUnOp) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
- Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
- array_lengthof(Ops)), 0);
+ Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
} else {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
- Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
- array_lengthof(Ops)), 0);
+ Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
}
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
SDValue RetVals[] = { Undef, Ret };
@@ -1971,8 +1968,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
SDValue Segment = CurDAG->getRegister(0, MVT::i32);
const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
Disp, Segment, VMask, Chain};
- SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
- VTs, Ops, array_lengthof(Ops));
+ SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), VTs, Ops);
// Node has 2 outputs: VDst and MVT::Other.
// ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
// We replace VDst of Node with VDst of ResNode, and Other of Node with Other
@@ -2186,7 +2182,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
SDValue Ops[] = {N1, InFlag};
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
@@ -2267,16 +2263,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
InFlag };
if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
- array_lengthof(Ops));
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
Chain = SDValue(CNode, 2);
InFlag = SDValue(CNode, 3);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
- array_lengthof(Ops));
+ SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
Chain = SDValue(CNode, 0);
InFlag = SDValue(CNode, 1);
}
@@ -2287,15 +2281,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { N1, InFlag };
if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
- array_lengthof(Ops));
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
InFlag = SDValue(CNode, 2);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
- array_lengthof(Ops));
+ SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
InFlag = SDValue(CNode, 0);
}
}
@@ -2343,6 +2335,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
+ // Propagate ordering to the last node, for now.
+ CurDAG->AssignOrdering(InFlag.getNode(), CurDAG->GetOrdering(Node));
+
return NULL;
}
@@ -2409,8 +2404,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
Move =
SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
- MVT::Other, Ops,
- array_lengthof(Ops)), 0);
+ MVT::Other, Ops), 0);
Chain = Move.getValue(1);
ReplaceUses(N0.getValue(1), Chain);
} else {
@@ -2441,8 +2435,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
- CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
- array_lengthof(Ops));
+ CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
InFlag = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
@@ -2674,8 +2667,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
Node->getDebugLoc(),
- MVT::i32, MVT::Other, Ops,
- array_lengthof(Ops));
+ MVT::i32, MVT::Other, Ops);
Result->setMemRefs(MemOp, MemOp + 2);
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6934186..f69f5d8 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -163,10 +163,28 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
-
RegInfo = TM.getRegisterInfo();
TD = getDataLayout();
+ resetOperationActions();
+}
+
+void X86TargetLowering::resetOperationActions() {
+ const TargetMachine &TM = getTargetMachine();
+ static bool FirstTimeThrough = true;
+
+ // If none of the target options have changed, then we don't need to reset the
+ // operation actions.
+ if (!FirstTimeThrough && TO == TM.Options) return;
+
+ if (!FirstTimeThrough) {
+ // Reinitialize the actions.
+ initActions();
+ FirstTimeThrough = false;
+ }
+
+ TO = TM.Options;
+
// Set up the TargetLowering object.
static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
@@ -508,16 +526,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
- setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
- // On X86 and X86-64, atomic operations are lowered to locked instructions.
- // Locked instructions, in turn, have implicit fence semantics (all memory
- // operations are flushed before issuing the locked instruction, and they
- // are not buffered), so we can fold away the common pattern of
- // fence-atomic-fence.
- setShouldFoldAtomicFences(true);
-
// Expand certain atomics
for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
@@ -1785,7 +1795,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
SDValue Ops[] = { Chain, InFlag };
Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
- MVT::Other, MVT::Glue, Ops, 2), 1);
+ MVT::Other, MVT::Glue, Ops), 1);
Val = Chain.getValue(0);
// Round the f80 to the right size, which also moves it to the appropriate
@@ -4404,13 +4414,15 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
if (Subtarget->hasInt256()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
+ array_lengthof(Ops));
} else {
// 256-bit logic and arithmetic instructions in AVX are all
// floating-point, no support for integer ops. Emit fp zeroed vectors.
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
+ array_lengthof(Ops));
}
} else
llvm_unreachable("Unexpected vector type");
@@ -4431,7 +4443,8 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
if (VT.is256BitVector()) {
if (HasInt256) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
+ array_lengthof(Ops));
} else { // AVX
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
@@ -5101,7 +5114,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
SDValue ResNode =
- DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64,
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops,
+ array_lengthof(Ops), MVT::i64,
LDBase->getPointerInfo(),
LDBase->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
@@ -7624,10 +7638,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
if (InFlag) {
SDValue Ops[] = { Chain, TGA, *InFlag };
- Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
} else {
SDValue Ops[] = { Chain, TGA };
- Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@@ -7937,7 +7951,7 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
}
SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
}
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -8220,8 +8234,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
- SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
- MVT::i64, MMO);
+ SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
+ array_lengthof(Ops), MVT::i64, MMO);
APInt FF(32, 0x5F800000ULL);
@@ -8313,8 +8327,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, MemSize, MemSize);
- Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
- DstTy, MMO);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops,
+ array_lengthof(Ops), DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
@@ -8328,7 +8342,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
- Ops, 3, DstTy, MMO);
+ Ops, array_lengthof(Ops), DstTy,
+ MMO);
return std::make_pair(FIST, StackSlot);
} else {
SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
@@ -8340,8 +8355,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
MVT::i32, eax.getValue(2));
SDValue Ops[] = { eax, edx };
SDValue pair = IsReplace
- ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2)
- : DAG.getMergeValues(Ops, 2, DL);
+ ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops))
+ : DAG.getMergeValues(Ops, array_lengthof(Ops), DL);
return std::make_pair(pair, SDValue());
}
}
@@ -9165,14 +9180,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
}
if (LHS.getNode()) {
- // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip
- // the condition code later.
- bool Invert = false;
- if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) {
- Invert = true;
- LHS = LHS.getOperand(0);
- }
-
// If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
@@ -9189,9 +9196,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
- // Flip the condition if the LHS was a not instruction
- if (Invert)
- Cond = X86::GetOppositeBranchCondition(Cond);
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(Cond, MVT::i8), BT);
}
@@ -9335,14 +9339,54 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
if (VT == MVT::v2i64) {
- if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
- return SDValue();
+ if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
+ assert(Subtarget->hasSSE2() && "Don't know how to lower!");
+
+ // First cast everything to the right type.
+ Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+ // Since SSE has no unsigned integer comparisons, we need to flip the sign
+ // bits of the inputs before performing those operations. The lower
+ // compare is always unsigned.
+ SDValue SB;
+ if (FlipSigns) {
+ SB = DAG.getConstant(0x80000000U, MVT::v4i32);
+ } else {
+ SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32);
+ SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32);
+ SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Sign, Zero, Sign, Zero);
+ }
+ Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
+ Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
+
+ // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
+ SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
+ SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
+
+ // Create masks for only the low parts/high parts of the 64 bit integers.
+ const int MaskHi[] = { 1, 1, 3, 3 };
+ const int MaskLo[] = { 0, 0, 2, 2 };
+ SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
+ SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
+ SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
+
+ SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
+ Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
// If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
// pcmpeqd + pshufd + pand.
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
- // First cast everything to the right type,
+ // First cast everything to the right type.
Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
@@ -9361,17 +9405,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
}
}
- // Since SSE has no unsigned integer comparisons, we need to flip the sign
+ // Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
EVT EltVT = VT.getVectorElementType();
- SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
- EltVT);
- std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
- SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
- SignBits.size());
- Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
- Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+ SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT);
+ Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
+ Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
}
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -10937,7 +10977,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDValue(Result.getNode(), 1) };
SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
DAG.getVTList(Op->getValueType(1), MVT::Glue),
- Ops, 4);
+ Ops, array_lengthof(Ops));
// Return { result, isValid, chain }.
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
@@ -10990,7 +11030,10 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+ unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
+ assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
+ (FrameReg == X86::EBP && VT == MVT::i32)) &&
+ "Invalid Frame Register!");
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
@@ -11010,21 +11053,23 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Handler = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
- Subtarget->is64Bit() ? X86::RBP : X86::EBP,
- getPointerTy());
- unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
-
- SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
- DAG.getIntPtrConstant(RegInfo->getSlotSize()));
- StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+ EVT PtrVT = getPointerTy();
+ unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
+ assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
+ (FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
+ "Invalid Frame Register!");
+ SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
+ unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
+
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
+ DAG.getIntPtrConstant(RegInfo->getSlotSize()));
+ StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
- return DAG.getNode(X86ISD::EH_RETURN, dl,
- MVT::Other,
- Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+ return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain,
+ DAG.getRegister(StoreAddrReg, PtrVT));
}
SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
@@ -11235,7 +11280,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
DAG.getVTList(MVT::Other),
- Ops, 2, MVT::i16, MMO);
+ Ops, array_lengthof(Ops), MVT::i16,
+ MMO);
// Load FP Control Word from stack slot
SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
@@ -12075,52 +12121,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
}
}
-static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- DebugLoc dl = Op.getDebugLoc();
-
- // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
- // There isn't any reason to disable it if the target processor supports it.
- if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
- SDValue Chain = Op.getOperand(0);
- SDValue Zero = DAG.getConstant(0, MVT::i32);
- SDValue Ops[] = {
- DAG.getRegister(X86::ESP, MVT::i32), // Base
- DAG.getTargetConstant(1, MVT::i8), // Scale
- DAG.getRegister(0, MVT::i32), // Index
- DAG.getTargetConstant(0, MVT::i32), // Disp
- DAG.getRegister(0, MVT::i32), // Segment.
- Zero,
- Chain
- };
- SDNode *Res =
- DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
- array_lengthof(Ops));
- return SDValue(Res, 0);
- }
-
- unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
- if (!isDev)
- return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-
- unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
- // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
- if (!Op1 && !Op2 && !Op3 && Op4)
- return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-
- // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
- if (Op1 && !Op2 && !Op3 && !Op4)
- return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-
- // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
- // (MFENCE)>;
- return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
-}
-
static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
@@ -12149,9 +12149,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
Zero,
Chain
};
- SDNode *Res =
- DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
- array_lengthof(Ops));
+ SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
return SDValue(Res, 0);
}
@@ -12185,7 +12183,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
- Ops, 5, T, MMO);
+ Ops, array_lengthof(Ops), T, MMO);
SDValue cpOut =
DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
return cpOut;
@@ -12207,7 +12205,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
rdx.getValue(1)
};
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
}
SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
@@ -12301,7 +12299,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
// For MacOSX, we want to call an alternative entry point: __sincos_stret,
- // which returns the values in two XMM registers.
+ // which returns the values as { float, float } (in XMM0) or
+ // { double, double } (which is returned in XMM0, XMM1).
DebugLoc dl = Op.getDebugLoc();
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
@@ -12316,14 +12315,16 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
Entry.isZExt = false;
Args.push_back(Entry);
+ bool isF64 = ArgVT == MVT::f64;
// Only optimize x86_64 for now. i386 is a bit messy. For f32,
// the small struct {f32, f32} is returned in (eax, edx). For f64,
// the results are returned via SRet in memory.
- const char *LibcallName = (ArgVT == MVT::f64)
- ? "__sincos_stret" : "__sincosf_stret";
+ const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
- StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ Type *RetTy = isF64
+ ? (Type*)StructType::get(ArgTy, ArgTy, NULL)
+ : (Type*)VectorType::get(ArgTy, 4);
TargetLowering::
CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
false, false, false, false, 0,
@@ -12331,7 +12332,18 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
/*doesNotRet=*/false, /*isReturnValueUsed*/true,
Callee, Args, DAG, dl);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
- return CallResult.first;
+
+ if (isF64)
+ // Returned in xmm0 and xmm1.
+ return CallResult.first;
+
+ // Returned in bits 0:31 and 32:64 xmm0.
+ SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
+ CallResult.first, DAG.getIntPtrConstant(0));
+ SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
+ CallResult.first, DAG.getIntPtrConstant(1));
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
}
/// LowerOperation - Provide custom lowering hooks for some operations.
@@ -12340,7 +12352,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, Subtarget, DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
@@ -12457,7 +12468,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
SDValue Ops[] = { Chain, In1, In2L, In2H };
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64,
cast<MemSDNode>(Node)->getMemOperand());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
@@ -12537,7 +12548,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
eax.getValue(2));
// Use a buildpair to merge the two 32-bit values into a 64-bit one.
SDValue Ops[] = { eax, edx };
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops,
+ array_lengthof(Ops)));
Results.push_back(edx.getValue(1));
return;
}
@@ -12576,7 +12588,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
X86ISD::LCMPXCHG8_DAG;
SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
- Ops, 3, T, MMO);
+ Ops, array_lengthof(Ops), T, MMO);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
HalfT, Result.getValue(1));
@@ -15063,7 +15075,8 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
SDValue ResNode =
- DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+ array_lengthof(Ops),
Ld->getMemoryVT(),
Ld->getPointerInfo(),
Ld->getAlignment(),
@@ -15755,6 +15768,51 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
+ // Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
+ if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
+ Cond.getOpcode() == ISD::SETCC) {
+
+ assert(Cond.getValueType().isVector() &&
+ "vector select expects a vector selector!");
+
+ EVT IntVT = Cond.getValueType();
+ bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
+ bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+ if (!TValIsAllOnes && !FValIsAllZeros) {
+ // Try invert the condition if true value is not all 1s and false value
+ // is not all 0s.
+ bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+ bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
+
+ if (TValIsAllZeros || FValIsAllOnes) {
+ SDValue CC = Cond.getOperand(2);
+ ISD::CondCode NewCC =
+ ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ Cond.getOperand(0).getValueType().isInteger());
+ Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
+ std::swap(LHS, RHS);
+ TValIsAllOnes = FValIsAllOnes;
+ FValIsAllZeros = TValIsAllZeros;
+ }
+ }
+
+ if (TValIsAllOnes || FValIsAllZeros) {
+ SDValue Ret;
+
+ if (TValIsAllOnes && FValIsAllZeros)
+ Ret = Cond;
+ else if (TValIsAllOnes)
+ Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond,
+ DAG.getNode(ISD::BITCAST, DL, IntVT, RHS));
+ else if (FValIsAllZeros)
+ Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond,
+ DAG.getNode(ISD::BITCAST, DL, IntVT, LHS));
+
+ return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
+ }
+ }
+
// If we know that this node is legal then we know that it is going to be
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
@@ -15815,6 +15873,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
SDValue SetCC;
const ConstantSDNode* C = 0;
bool needOppositeCond = (CC == X86::COND_E);
+ bool checkAgainstTrue = false; // Is it a comparison against 1?
if ((C = dyn_cast<ConstantSDNode>(Op1)))
SetCC = Op2;
@@ -15823,18 +15882,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
else // Quit if all operands are not constants.
return SDValue();
- if (C->getZExtValue() == 1)
+ if (C->getZExtValue() == 1) {
needOppositeCond = !needOppositeCond;
- else if (C->getZExtValue() != 0)
+ checkAgainstTrue = true;
+ } else if (C->getZExtValue() != 0)
// Quit if the constant is neither 0 or 1.
return SDValue();
- // Skip 'zext' or 'trunc' node.
- if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
- SetCC.getOpcode() == ISD::TRUNCATE)
- SetCC = SetCC.getOperand(0);
+ bool truncatedToBoolWithAnd = false;
+ // Skip (zext $x), (trunc $x), or (and $x, 1) node.
+ while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+ SetCC.getOpcode() == ISD::TRUNCATE ||
+ SetCC.getOpcode() == ISD::AND) {
+ if (SetCC.getOpcode() == ISD::AND) {
+ int OpIdx = -1;
+ ConstantSDNode *CS;
+ if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
+ CS->getZExtValue() == 1)
+ OpIdx = 1;
+ if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
+ CS->getZExtValue() == 1)
+ OpIdx = 0;
+ if (OpIdx == -1)
+ break;
+ SetCC = SetCC.getOperand(OpIdx);
+ truncatedToBoolWithAnd = true;
+ } else
+ SetCC = SetCC.getOperand(0);
+ }
switch (SetCC.getOpcode()) {
+ case X86ISD::SETCC_CARRY:
+ // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
+ // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
+ // i.e. it's a comparison against true but the result of SETCC_CARRY is not
+ // truncated to i1 using 'and'.
+ if (checkAgainstTrue && !truncatedToBoolWithAnd)
+ break;
+ assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
+ "Invalid use of SETCC_CARRY!");
+ // FALL THROUGH
case X86ISD::SETCC:
// Set the condition code or opposite one if necessary.
CC = X86::CondCode(SetCC.getConstantOperandVal(0));
@@ -16165,8 +16252,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
-/// when possible.
+/// PerformShiftCombine - Combine shifts.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 5725f7a..2727e22 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -723,6 +723,9 @@ namespace llvm {
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
+ /// \brief Reset the operation actions based on target options.
+ virtual void resetOperationActions();
+
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(MVT VT) const;
@@ -734,6 +737,10 @@ namespace llvm {
const X86RegisterInfo *RegInfo;
const DataLayout *TD;
+ /// Used to store the TargetOptions so that we don't waste time resetting
+ /// the operation actions unless we have to.
+ TargetOptions TO;
+
/// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
/// floating point ops.
/// When SSE is available, use it for f32 operations.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
index 0ef9491..a71e024 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -35,25 +35,27 @@ def MRM_C3 : Format<35>;
def MRM_C4 : Format<36>;
def MRM_C8 : Format<37>;
def MRM_C9 : Format<38>;
-def MRM_E8 : Format<39>;
-def MRM_F0 : Format<40>;
-def MRM_F8 : Format<41>;
-def MRM_F9 : Format<42>;
+def MRM_CA : Format<39>;
+def MRM_CB : Format<40>;
+def MRM_E8 : Format<41>;
+def MRM_F0 : Format<42>;
def RawFrmImm8 : Format<43>;
def RawFrmImm16 : Format<44>;
-def MRM_D0 : Format<45>;
-def MRM_D1 : Format<46>;
-def MRM_D4 : Format<47>;
-def MRM_D5 : Format<48>;
-def MRM_D6 : Format<49>;
-def MRM_D8 : Format<50>;
-def MRM_D9 : Format<51>;
-def MRM_DA : Format<52>;
-def MRM_DB : Format<53>;
-def MRM_DC : Format<54>;
-def MRM_DD : Format<55>;
-def MRM_DE : Format<56>;
-def MRM_DF : Format<57>;
+def MRM_F8 : Format<45>;
+def MRM_F9 : Format<46>;
+def MRM_D0 : Format<47>;
+def MRM_D1 : Format<48>;
+def MRM_D4 : Format<49>;
+def MRM_D5 : Format<50>;
+def MRM_D6 : Format<51>;
+def MRM_D8 : Format<52>;
+def MRM_D9 : Format<53>;
+def MRM_DA : Format<54>;
+def MRM_DB : Format<55>;
+def MRM_DC : Format<56>;
+def MRM_DD : Format<57>;
+def MRM_DE : Format<58>;
+def MRM_DF : Format<59>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 7ba542c..7c0423f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4281,7 +4281,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
- VT, MVT::Other, &AddrOps[0], AddrOps.size());
+ VT, MVT::Other, AddrOps);
NewNodes.push_back(Load);
// Preserve memory reference information.
@@ -4303,8 +4303,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
if (Load)
BeforeOps.push_back(SDValue(Load, 0));
std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
- SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
- BeforeOps.size());
+ SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
NewNodes.push_back(NewNode);
// Emit the store instruction.
@@ -4326,8 +4325,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
(*MMOs.first)->getAlignment() >= Alignment;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
- dl, MVT::Other,
- &AddrOps[0], AddrOps.size());
+ dl, MVT::Other, AddrOps);
NewNodes.push_back(Store);
// Preserve memory reference information.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index ccc1aa2..3380d8c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1833,90 +1833,90 @@ include "X86InstrCompiler.td"
// Assembler Mnemonic Aliases
//===----------------------------------------------------------------------===//
-def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"cbw", "cbtw">;
-def : MnemonicAlias<"cwde", "cwtl">;
-def : MnemonicAlias<"cwd", "cwtd">;
-def : MnemonicAlias<"cdq", "cltd">;
-def : MnemonicAlias<"cdqe", "cltq">;
-def : MnemonicAlias<"cqo", "cqto">;
+def : MnemonicAlias<"cbw", "cbtw", "att">;
+def : MnemonicAlias<"cwde", "cwtl", "att">;
+def : MnemonicAlias<"cwd", "cwtd", "att">;
+def : MnemonicAlias<"cdq", "cltd", "att">;
+def : MnemonicAlias<"cdqe", "cltq", "att">;
+def : MnemonicAlias<"cqo", "cqto", "att">;
// lret maps to lretl, it is not ambiguous with lretq.
-def : MnemonicAlias<"lret", "lretl">;
+def : MnemonicAlias<"lret", "lretl", "att">;
-def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"leavel", "leave", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"loopz", "loope">;
-def : MnemonicAlias<"loopnz", "loopne">;
+def : MnemonicAlias<"loopz", "loope", "att">;
+def : MnemonicAlias<"loopnz", "loopne", "att">;
-def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popfd", "popfl">;
+def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl", "att">;
// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
// all modes. However: "push (addr)" and "push $42" should default to
// pushl/pushq depending on the current mode. Similar for "pop %bx"
-def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushfd", "pushfl">;
+def : MnemonicAlias<"push", "pushl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl", "att">;
-def : MnemonicAlias<"repe", "rep">;
-def : MnemonicAlias<"repz", "rep">;
-def : MnemonicAlias<"repnz", "repne">;
+def : MnemonicAlias<"repe", "rep", "att">;
+def : MnemonicAlias<"repz", "rep", "att">;
+def : MnemonicAlias<"repnz", "repne", "att">;
-def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"retl", "ret", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"salb", "shlb">;
-def : MnemonicAlias<"salw", "shlw">;
-def : MnemonicAlias<"sall", "shll">;
-def : MnemonicAlias<"salq", "shlq">;
+def : MnemonicAlias<"salb", "shlb", "att">;
+def : MnemonicAlias<"salw", "shlw", "att">;
+def : MnemonicAlias<"sall", "shll", "att">;
+def : MnemonicAlias<"salq", "shlq", "att">;
-def : MnemonicAlias<"smovb", "movsb">;
-def : MnemonicAlias<"smovw", "movsw">;
-def : MnemonicAlias<"smovl", "movsl">;
-def : MnemonicAlias<"smovq", "movsq">;
+def : MnemonicAlias<"smovb", "movsb", "att">;
+def : MnemonicAlias<"smovw", "movsw", "att">;
+def : MnemonicAlias<"smovl", "movsl", "att">;
+def : MnemonicAlias<"smovq", "movsq", "att">;
-def : MnemonicAlias<"ud2a", "ud2">;
-def : MnemonicAlias<"verrw", "verr">;
+def : MnemonicAlias<"ud2a", "ud2", "att">;
+def : MnemonicAlias<"verrw", "verr", "att">;
// System instruction aliases.
-def : MnemonicAlias<"iret", "iretl">;
-def : MnemonicAlias<"sysret", "sysretl">;
-def : MnemonicAlias<"sysexit", "sysexitl">;
+def : MnemonicAlias<"iret", "iretl", "att">;
+def : MnemonicAlias<"sysret", "sysretl", "att">;
+def : MnemonicAlias<"sysexit", "sysexitl", "att">;
-def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lgdtl", "lgdt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt", "att">, Requires<[In64BitMode]>;
// Floating point stack aliases.
-def : MnemonicAlias<"fcmovz", "fcmove">;
-def : MnemonicAlias<"fcmova", "fcmovnbe">;
-def : MnemonicAlias<"fcmovnae", "fcmovb">;
-def : MnemonicAlias<"fcmovna", "fcmovbe">;
-def : MnemonicAlias<"fcmovae", "fcmovnb">;
-def : MnemonicAlias<"fcomip", "fcompi">;
-def : MnemonicAlias<"fildq", "fildll">;
-def : MnemonicAlias<"fistpq", "fistpll">;
-def : MnemonicAlias<"fisttpq", "fisttpll">;
-def : MnemonicAlias<"fldcww", "fldcw">;
-def : MnemonicAlias<"fnstcww", "fnstcw">;
-def : MnemonicAlias<"fnstsww", "fnstsw">;
-def : MnemonicAlias<"fucomip", "fucompi">;
-def : MnemonicAlias<"fwait", "wait">;
+def : MnemonicAlias<"fcmovz", "fcmove", "att">;
+def : MnemonicAlias<"fcmova", "fcmovnbe", "att">;
+def : MnemonicAlias<"fcmovnae", "fcmovb", "att">;
+def : MnemonicAlias<"fcmovna", "fcmovbe", "att">;
+def : MnemonicAlias<"fcmovae", "fcmovnb", "att">;
+def : MnemonicAlias<"fcomip", "fcompi", "att">;
+def : MnemonicAlias<"fildq", "fildll", "att">;
+def : MnemonicAlias<"fistpq", "fistpll", "att">;
+def : MnemonicAlias<"fisttpq", "fisttpll", "att">;
+def : MnemonicAlias<"fldcww", "fldcw", "att">;
+def : MnemonicAlias<"fnstcww", "fnstcw", "att">;
+def : MnemonicAlias<"fnstsww", "fnstsw", "att">;
+def : MnemonicAlias<"fucomip", "fucompi", "att">;
+def : MnemonicAlias<"fwait", "wait", "att">;
class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 3842387..cce938b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4462,12 +4462,12 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
// Move Packed Doubleword Int first element to Doubleword Int
//
let SchedRW = [WriteMove] in {
-def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "vmov{d|q}\t{$src, $dst|$dst, $src}",
+def VMOVPQIto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>,
- TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
+ VEX;
def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
@@ -5094,6 +5094,16 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
Sched<[WriteVecALULd]>;
}
+// Helper fragments to match sext vXi1 to vXiY.
+def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
+ VR128:$src))>;
+def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>;
+def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>;
+def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
+ VR256:$src))>;
+def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>;
+def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>;
+
let Predicates = [HasAVX] in {
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
int_x86_ssse3_pabs_b_128>, VEX;
@@ -5101,6 +5111,19 @@ let Predicates = [HasAVX] in {
int_x86_ssse3_pabs_w_128>, VEX;
defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
int_x86_ssse3_pabs_d_128>, VEX;
+
+ def : Pat<(xor
+ (bc_v2i64 (v16i1sextv16i8)),
+ (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
+ (VPABSBrr128 VR128:$src)>;
+ def : Pat<(xor
+ (bc_v2i64 (v8i1sextv8i16)),
+ (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
+ (VPABSWrr128 VR128:$src)>;
+ def : Pat<(xor
+ (bc_v2i64 (v4i1sextv4i32)),
+ (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
+ (VPABSDrr128 VR128:$src)>;
}
let Predicates = [HasAVX2] in {
@@ -5110,6 +5133,19 @@ let Predicates = [HasAVX2] in {
int_x86_avx2_pabs_w>, VEX, VEX_L;
defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
int_x86_avx2_pabs_d>, VEX, VEX_L;
+
+ def : Pat<(xor
+ (bc_v4i64 (v32i1sextv32i8)),
+ (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
+ (VPABSBrr256 VR256:$src)>;
+ def : Pat<(xor
+ (bc_v4i64 (v16i1sextv16i16)),
+ (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
+ (VPABSWrr256 VR256:$src)>;
+ def : Pat<(xor
+ (bc_v4i64 (v8i1sextv8i32)),
+ (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
+ (VPABSDrr256 VR256:$src)>;
}
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
@@ -5119,6 +5155,21 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
int_x86_ssse3_pabs_d_128>;
+let Predicates = [HasSSSE3] in {
+ def : Pat<(xor
+ (bc_v2i64 (v16i1sextv16i8)),
+ (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
+ (PABSBrr128 VR128:$src)>;
+ def : Pat<(xor
+ (bc_v2i64 (v8i1sextv8i16)),
+ (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
+ (PABSWrr128 VR128:$src)>;
+ def : Pat<(xor
+ (bc_v2i64 (v4i1sextv4i32)),
+ (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
+ (PABSDrr128 VR128:$src)>;
+}
+
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index 5b6298b..89c1a68 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -34,7 +34,7 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-
+
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"shl{w}\t{$src2, $dst|$dst, $src2}",
@@ -43,7 +43,7 @@ def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
"shl{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>;
-def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
+def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"shl{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 053417c..bab3cdd 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -449,15 +449,15 @@ let Uses = [RDX, RAX] in {
def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
"xsave\t$dst", []>, TB;
def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ "xsave{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
"xrstor\t$dst", []>, TB;
def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
- "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ "xrstor{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
"xsaveopt\t$dst", []>, TB;
def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
- "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+ "xsaveopt{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
}
} // SchedRW
@@ -515,8 +515,15 @@ let Predicates = [HasFSGSBase, In64BitMode] in {
//===----------------------------------------------------------------------===//
// INVPCID Instruction
def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
- "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In32BitMode]>;
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
- "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+ "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// SMAP Instruction
+let Defs = [EFLAGS], Uses = [EFLAGS] in {
+ def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
+ def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
index 7de6791..84c9203 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -18,7 +18,7 @@ def HaswellModel : SchedMachineModel {
let IssueWidth = 4;
let MinLatency = 0; // 0 = Out-of-order execution.
let LoadLatency = 4;
- let ILPWindow = 40;
+ let ILPWindow = 30;
let MispredictPenalty = 16;
}
diff --git a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 74d5f1b..b36b3ad 100644
--- a/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -19,7 +19,7 @@ def SandyBridgeModel : SchedMachineModel {
let IssueWidth = 4;
let MinLatency = 0; // 0 = Out-of-order execution.
let LoadLatency = 4;
- let ILPWindow = 30;
+ let ILPWindow = 20;
let MispredictPenalty = 16;
}
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 14619b6..74da2a9 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -170,6 +170,26 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
+static bool OSHasAVXSupport() {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+#if defined(__GNUC__)
+ // Check xgetbv; this uses a .byte sequence instead of the instruction
+ // directly because older assemblers do not include support for xgetbv and
+ // there is no easy way to conditionally compile based on the assembler used.
+ int rEAX, rEDX;
+ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+ unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+ int rEAX = 0; // Ensures we return false
+#endif
+ return (rEAX & 6) == 6;
+#else
+ return false;
+#endif
+}
+
void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
unsigned MaxLevel;
@@ -192,7 +212,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
- if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); }
+ if (((ECX >> 27) & 1) && ((ECX >> 28) & 1) && OSHasAVXSupport()) {
+ X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX);
+ }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
@@ -467,6 +489,7 @@ void X86Subtarget::initializeEnvironment() {
PostRAScheduler = false;
PadShortFunctions = false;
CallRegIndirect = false;
+ LEAUsesAG = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 6fbdb1d..66832b9 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -165,6 +165,9 @@ protected:
/// CallRegIndirect - True if the Calls with memory reference should be converted
/// to a register-based indirect call.
bool CallRegIndirect;
+ /// LEAUsesAG - True if the LEA instruction inputs have to be ready at
+ /// address generation (AG) time.
+ bool LEAUsesAG;
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
@@ -278,6 +281,7 @@ public:
bool hasSlowDivide() const { return HasSlowDivide; }
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
+ bool LEAusesAG() const { return LEAUsesAG; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 8aa58a2..00fa47f 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -215,6 +215,11 @@ bool X86PassConfig::addPreEmitPass() {
addPass(createX86PadShortFunctions());
ShouldPrint = true;
}
+ if (getOptLevel() != CodeGenOpt::None &&
+ getX86Subtarget().LEAusesAG()){
+ addPass(createX86FixupLEAs());
+ ShouldPrint = true;
+ }
return ShouldPrint;
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index a98c699..eba9d78 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -334,9 +334,44 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+
+ static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = {
+ // These are somewhat magic numbers justified by looking at the output of
+ // Intel's IACA, running some kernels and making sure when we take
+ // legalization into account the throughput will be overestimated.
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+ // There are faster sequences for float conversions.
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+ };
+
+ if (ST->hasSSE2() && !ST->hasAVX()) {
+ int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl,
+ array_lengthof(SSE2ConvTbl),
+ ISD, LTDest.second, LTSrc.second);
+ if (Idx != -1)
+ return LTSrc.first * SSE2ConvTbl[Idx].Cost;
+ }
+
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
+ // The function getSimpleVT only handles simple value types.
if (!SrcTy.isSimple() || !DstTy.isSimple())
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 7b99967..a2ae40c 100644
--- a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -97,8 +97,8 @@ static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
static DecodeStatus Decode2RInstruction(MCInst &Inst,
unsigned Insn,
@@ -242,10 +242,9 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(Val));
- Inst.addOperand(MCOperand::CreateImm(0));
+static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(-(int64_t)Val));
return MCDisassembler::Success;
}
diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 1592351..9ae8c0d 100644
--- a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -84,14 +84,3 @@ printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
assert(Op.isExpr() && "unknown operand kind in printOperand");
printExpr(Op.getExpr(), O);
}
-
-void XCoreInstPrinter::
-printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
- printOperand(MI, opNum, O);
-
- if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
- return;
-
- O << "+";
- printOperand(MI, opNum+1, O);
-}
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index b5b072d..c177365 100644
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -66,6 +66,9 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
+ if (RM == Reloc::Default) {
+ RM = Reloc::Static;
+ }
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/contrib/llvm/lib/Target/XCore/XCore.h b/contrib/llvm/lib/Target/XCore/XCore.h
index 08f091e..2f375fc 100644
--- a/contrib/llvm/lib/Target/XCore/XCore.h
+++ b/contrib/llvm/lib/Target/XCore/XCore.h
@@ -20,12 +20,16 @@
namespace llvm {
class FunctionPass;
+ class ModulePass;
class TargetMachine;
class XCoreTargetMachine;
class formatted_raw_ostream;
+ void initializeXCoreLowerThreadLocalPass(PassRegistry &p);
+
FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+ ModulePass *createXCoreLowerThreadLocalPass();
} // end namespace llvm;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 0d146ba..e177ad3 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -36,7 +36,6 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -46,12 +45,6 @@
#include <cctype>
using namespace llvm;
-static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
- cl::desc("Maximum number of threads (for emulation thread-local storage)"),
- cl::Hidden,
- cl::value_desc("number"),
- cl::init(8));
-
namespace {
class XCoreAsmPrinter : public AsmPrinter {
const XCoreSubtarget &Subtarget;
@@ -152,10 +145,10 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
EmitAlignment(Align > 2 ? Align : 2, GV);
- unsigned Size = TD->getTypeAllocSize(C->getType());
if (GV->isThreadLocal()) {
- Size *= MaxThreads;
+ report_fatal_error("TLS is not supported by this target!");
}
+ unsigned Size = TD->getTypeAllocSize(C->getType());
if (MAI->hasDotTypeDotSizeDirective()) {
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
@@ -164,10 +157,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.EmitLabel(GVSym);
EmitGlobalConstant(C);
- if (GV->isThreadLocal()) {
- for (unsigned i = 1; i < MaxThreads; ++i)
- EmitGlobalConstant(C);
- }
// The ABI requires that unsigned scalar types smaller than 32 bits
// are padded to 32 bits.
if (Size < 4)
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index fbf86c5..eb29b50 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -68,8 +68,6 @@ namespace {
// Complex Pattern Selectors.
bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual const char *getPassName() const {
return "XCore DAG->DAG Pattern Instruction Selection";
@@ -110,48 +108,6 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
return false;
}
-bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
- }
- if (Addr.getOpcode() == ISD::ADD) {
- ConstantSDNode *CN = 0;
- if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
- && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
- // Constant word offset from a object in the data region
- Base = Addr.getOperand(0).getOperand(0);
- Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
- return true;
- }
- }
- return false;
-}
-
-bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
- }
- if (Addr.getOpcode() == ISD::ADD) {
- ConstantSDNode *CN = 0;
- if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
- && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
- // Constant word offset from a object in the data region
- Base = Addr.getOperand(0).getOperand(0);
- Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
- return true;
- }
- }
- return false;
-}
-
SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
switch (N->getOpcode()) {
@@ -185,36 +141,36 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
N->getOperand(2) };
return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
- Ops, 3);
+ Ops);
}
case XCoreISD::LSUB: {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
N->getOperand(2) };
return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
- Ops, 3);
+ Ops);
}
case XCoreISD::MACCU: {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3) };
return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
- Ops, 4);
+ Ops);
}
case XCoreISD::MACCS: {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3) };
return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
- Ops, 4);
+ Ops);
}
case XCoreISD::LMUL: {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3) };
return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
- Ops, 4);
+ Ops);
}
case XCoreISD::CRC8: {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
- Ops, 3);
+ Ops);
}
case ISD::BRIND:
if (SDNode *ResNode = SelectBRIND(N))
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index a5d2be8..2d27f1a 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -36,6 +36,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
using namespace llvm;
const char *XCoreTargetLowering::
@@ -120,9 +122,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
- // Thread Local Storage
- setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-
// Conversion of i64 -> double produces constantpool nodes
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
@@ -172,7 +171,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode())
{
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
@@ -245,9 +243,20 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
SDValue XCoreTargetLowering::
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
{
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
- return getGlobalAddressWrapper(GA, GV, DAG);
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GN->getGlobal();
+ int64_t Offset = GN->getOffset();
+ // We can only fold positive offsets that are a multiple of the word size.
+ int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset);
+ GA = getGlobalAddressWrapper(GA, GV, DAG);
+ // Handle the rest of the offset.
+ if (Offset != FoldedOffset) {
+ SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32);
+ GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining);
+ }
+ return GA;
}
static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
@@ -255,44 +264,6 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
}
-static inline bool isZeroLengthArray(Type *Ty) {
- ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
- return AT && (AT->getNumElements() == 0);
-}
-
-SDValue XCoreTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
-{
- // FIXME there isn't really debug info here
- DebugLoc dl = Op.getDebugLoc();
- // transform to label + getid() * size
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias then use the aliasee to determine size
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
- }
- if (!GVar) {
- llvm_unreachable("Thread local object not a GlobalVariable?");
- }
- Type *Ty = cast<PointerType>(GV->getType())->getElementType();
- if (!Ty->isSized() || isZeroLengthArray(Ty)) {
-#ifndef NDEBUG
- errs() << "Size of thread local object " << GVar->getName()
- << " is unknown\n";
-#endif
- llvm_unreachable(0);
- }
- SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
- const DataLayout *TD = TM.getDataLayout();
- unsigned Size = TD->getTypeAllocSize(Ty);
- SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
- DAG.getConstant(Size, MVT::i32));
- return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
-}
-
SDValue XCoreTargetLowering::
LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
{
@@ -350,55 +321,58 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
ScaledIndex);
}
-static bool
-IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
- int64_t &Offset)
+SDValue XCoreTargetLowering::
+lowerLoadWordFromAlignedBasePlusOffset(DebugLoc DL, SDValue Chain, SDValue Base,
+ int64_t Offset, SelectionDAG &DAG) const
{
- if (Addr.getOpcode() != ISD::ADD) {
- return false;
+ if ((Offset & 0x3) == 0) {
+ return DAG.getLoad(getPointerTy(), DL, Chain, Base, MachinePointerInfo(),
+ false, false, false, 0);
}
- ConstantSDNode *CN = 0;
- if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
- return false;
- }
- int64_t off = CN->getSExtValue();
- const SDValue &Base = Addr.getOperand(0);
- const SDValue *Root = &Base;
- if (Base.getOpcode() == ISD::ADD &&
- Base.getOperand(1).getOpcode() == ISD::SHL) {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
- .getOperand(1));
- if (CN && (CN->getSExtValue() >= 2)) {
- Root = &Base.getOperand(0);
- }
- }
- if (isa<FrameIndexSDNode>(*Root)) {
- // All frame indicies are word aligned
- AlignedBase = Base;
- Offset = off;
- return true;
- }
- if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
- Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
- // All dp / cp relative addresses are word aligned
- AlignedBase = Base;
- Offset = off;
- return true;
- }
- // Check for an aligned global variable.
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(*Root)) {
- const GlobalValue *GV = GA->getGlobal();
- if (GA->getOffset() == 0 && GV->getAlignment() >= 4) {
- AlignedBase = Base;
- Offset = off;
- return true;
- }
+ // Lower to pair of consecutive word aligned loads plus some bit shifting.
+ int32_t HighOffset = RoundUpToAlignment(Offset, 4);
+ int32_t LowOffset = HighOffset - 4;
+ SDValue LowAddr, HighAddr;
+ if (GlobalAddressSDNode *GASD =
+ dyn_cast<GlobalAddressSDNode>(Base.getNode())) {
+ LowAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(),
+ LowOffset);
+ HighAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(),
+ HighOffset);
+ } else {
+ LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base,
+ DAG.getConstant(LowOffset, MVT::i32));
+ HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base,
+ DAG.getConstant(HighOffset, MVT::i32));
}
- return false;
+ SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, MVT::i32);
+ SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, MVT::i32);
+
+ SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+ LowAddr, MachinePointerInfo(),
+ false, false, false, 0);
+ SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+ HighAddr, MachinePointerInfo(),
+ false, false, false, 0);
+ SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+ SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+ High.getValue(1));
+ SDValue Ops[] = { Result, Chain };
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
+{
+ APInt KnownZero, KnownOne;
+ DAG.ComputeMaskedBits(Value, KnownZero, KnownOne);
+ return KnownZero.countTrailingOnes() >= 2;
}
SDValue XCoreTargetLowering::
LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
LoadSDNode *LD = cast<LoadSDNode>(Op);
assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
"Unexpected extension type");
@@ -416,45 +390,23 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue BasePtr = LD->getBasePtr();
DebugLoc DL = Op.getDebugLoc();
- SDValue Base;
- int64_t Offset;
- if (!LD->isVolatile() &&
- IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
- if (Offset % 4 == 0) {
- // We've managed to infer better alignment information than the load
- // already has. Use an aligned load.
- //
- return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
- MachinePointerInfo(),
- false, false, false, 0);
+ if (!LD->isVolatile()) {
+ const GlobalValue *GV;
+ int64_t Offset = 0;
+ if (DAG.isBaseWithConstantOffset(BasePtr) &&
+ isWordAligned(BasePtr->getOperand(0), DAG)) {
+ SDValue NewBasePtr = BasePtr->getOperand(0);
+ Offset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
+ return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr,
+ Offset, DAG);
+ }
+ if (TLI.isGAPlusOffset(BasePtr.getNode(), GV, Offset) &&
+ MinAlign(GV->getAlignment(), 4) == 4) {
+ SDValue NewBasePtr = DAG.getGlobalAddress(GV, DL,
+ BasePtr->getValueType(0));
+ return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr,
+ Offset, DAG);
}
- // Lower to
- // ldw low, base[offset >> 2]
- // ldw high, base[(offset >> 2) + 1]
- // shr low_shifted, low, (offset & 0x3) * 8
- // shl high_shifted, high, 32 - (offset & 0x3) * 8
- // or result, low_shifted, high_shifted
- SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
- SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
- SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
- SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
-
- SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
- SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
-
- SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
- LowAddr, MachinePointerInfo(),
- false, false, false, 0);
- SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
- HighAddr, MachinePointerInfo(),
- false, false, false, 0);
- SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
- SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
- SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
- High.getValue(1));
- SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, DL);
}
if (LD->getAlignment() == 2) {
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
index 8d258f5..c7dfa26 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -133,6 +133,9 @@ namespace llvm {
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
SelectionDAG &DAG) const;
+ SDValue lowerLoadWordFromAlignedBasePlusOffset(DebugLoc DL, SDValue Chain,
+ SDValue Base, int64_t Offset,
+ SelectionDAG &DAG) const;
// Lower Operand specifics
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
index 03653cb..587166c 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -168,21 +168,20 @@ def ldawb : PatFrag<(ops node:$addr, node:$offset),
(sub node:$addr, (shl node:$offset, 2))>;
// Instruction operand types
-def calltarget : Operand<i32>;
+def pcrel_imm : Operand<i32>;
+def pcrel_imm_neg : Operand<i32> {
+ let DecoderMethod = "DecodeNegImmOperand";
+}
def brtarget : Operand<OtherVT>;
-def pclabel : Operand<i32>;
+def brtarget_neg : Operand<OtherVT> {
+ let DecoderMethod = "DecodeNegImmOperand";
+}
// Addressing modes
def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
-def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
- []>;
-def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
- []>;
// Address operands
def MEMii : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let DecoderMethod = "DecodeMEMiiOperand";
let MIOperandInfo = (ops i32imm, i32imm);
}
@@ -274,10 +273,10 @@ multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> {
}
multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> {
- def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
- !strconcat(OpcStr, " $a, -$b"), []>;
- def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
- !strconcat(OpcStr, " $a, -$b"), []>;
+ def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget_neg:$b),
+ !strconcat(OpcStr, " $a, $b"), []>;
+ def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget_neg:$b),
+ !strconcat(OpcStr, " $a, $b"), []>;
}
multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> {
@@ -515,29 +514,29 @@ def LMUL_l6r : _FL6R<
//let Uses = [DP] in ...
let neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
"ldaw $a, dp[$b]", []>;
let isReMaterializable = 1 in
-def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
"ldaw $a, dp[$b]",
- [(set RRegs:$a, ADDRdpii:$b)]>;
+ [(set RRegs:$a, (dprelwrapper tglobaladdr:$b))]>;
let mayLoad=1 in
-def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins i32imm:$b),
"ldw $a, dp[$b]", []>;
-def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins i32imm:$b),
"ldw $a, dp[$b]",
- [(set RRegs:$a, (load ADDRdpii:$b))]>;
+ [(set RRegs:$a, (load (dprelwrapper tglobaladdr:$b)))]>;
let mayStore=1 in
-def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b),
"stw $a, dp[$b]", []>;
-def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b),
"stw $a, dp[$b]",
- [(store RRegs:$a, ADDRdpii:$b)]>;
+ [(store RRegs:$a, (dprelwrapper tglobaladdr:$b))]>;
//let Uses = [CP] in ..
let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
@@ -615,9 +614,9 @@ let Uses = [R11], isCall=1 in
defm BLAT : FU6_LU6_np<0b0111001101, "blat">;
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
+def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget_neg:$a), "bu $a", []>;
-def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
+def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget_neg:$a), "bu $a", []>;
def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
@@ -626,12 +625,12 @@ def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
//let Uses = [CP] in ...
let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
+def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]",
[]>;
let Defs = [R11], isReMaterializable = 1 in
-def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
- [(set R11, ADDRcpii:$a)]>;
+def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]",
+ [(set R11, (cprelwrapper tglobaladdr:$a))]>;
let Defs = [R11] in
defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">;
@@ -658,16 +657,26 @@ defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">;
// U10
-let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
-def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>;
+let Defs = [R11], isReMaterializable = 1 in {
+let neverHasSideEffects = 1 in
+def LDAPF_u10 : _FU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a", []>;
+
+def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a",
+ [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
-let Defs = [R11], isReMaterializable = 1 in
-def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+let neverHasSideEffects = 1 in
+def LDAPB_u10 : _FU10<0b110111, (outs), (ins pcrel_imm_neg:$a), "ldap r11, $a",
+ []>;
+
+let neverHasSideEffects = 1 in
+def LDAPB_lu10 : _FLU10<0b110111, (outs), (ins pcrel_imm_neg:$a),
+ "ldap r11, $a",
[(set R11, (pcrelwrapper tglobaladdr:$a))]>;
-let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in
-def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+let isCodeGenOnly = 1 in
+def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a",
[(set R11, (pcrelwrapper tblockaddress:$a))]>;
+}
let isCall=1,
// All calls clobber the link register and the non-callee-saved registers:
@@ -676,11 +685,15 @@ def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
-def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+def BLRF_u10 : _FU10<0b110100, (outs), (ins pcrel_imm:$a), "bl $a",
[(XCoreBranchLink immU10:$a)]>;
-def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+def BLRF_lu10 : _FLU10<0b110100, (outs), (ins pcrel_imm:$a), "bl $a",
[(XCoreBranchLink immU20:$a)]>;
+
+def BLRB_u10 : _FU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>;
+
+def BLRB_lu10 : _FLU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>;
}
let Defs = [R11], mayLoad = 1, isReMaterializable = 1,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
new file mode 100644
index 0000000..2e328b4
--- /dev/null
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -0,0 +1,145 @@
+//===-- XCoreLowerThreadLocal - Lower thread local variables --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains a pass that lowers thread local variables on the
+/// XCore.
+///
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+#define DEBUG_TYPE "xcore-lower-thread-local"
+
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads(
+ "xcore-max-threads", cl::Optional,
+ cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+ cl::Hidden, cl::value_desc("number"), cl::init(8));
+
+namespace {
+ /// Lowers thread local variables on the XCore. Each thread local variable is
+ /// expanded to an array of n elements indexed by the thread ID where n is the
+ /// fixed number hardware threads supported by the device.
+ struct XCoreLowerThreadLocal : public ModulePass {
+ static char ID;
+
+ XCoreLowerThreadLocal() : ModulePass(ID) {
+ initializeXCoreLowerThreadLocalPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool lowerGlobal(GlobalVariable *GV);
+
+ bool runOnModule(Module &M);
+ };
+}
+
+char XCoreLowerThreadLocal::ID = 0;
+
+INITIALIZE_PASS(XCoreLowerThreadLocal, "xcore-lower-thread-local",
+ "Lower thread local variables", false, false)
+
+ModulePass *llvm::createXCoreLowerThreadLocalPass() {
+ return new XCoreLowerThreadLocal();
+}
+
+static ArrayType *createLoweredType(Type *OriginalType) {
+ return ArrayType::get(OriginalType, MaxThreads);
+}
+
+static Constant *
+createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) {
+ SmallVector<Constant *, 8> Elements(MaxThreads);
+ for (unsigned i = 0; i != MaxThreads; ++i) {
+ Elements[i] = OriginalInitializer;
+ }
+ return ConstantArray::get(NewType, Elements);
+}
+
+static bool hasNonInstructionUse(GlobalVariable *GV) {
+ for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
+ ++UI)
+ if (!isa<Instruction>(*UI))
+ return true;
+
+ return false;
+}
+
+static bool isZeroLengthArray(Type *Ty) {
+ ArrayType *AT = dyn_cast<ArrayType>(Ty);
+ return AT && (AT->getNumElements() == 0);
+}
+
+bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
+ Module *M = GV->getParent();
+ LLVMContext &Ctx = M->getContext();
+ if (!GV->isThreadLocal())
+ return false;
+
+ // Skip globals that we can't lower and leave it for the backend to error.
+ if (hasNonInstructionUse(GV) ||
+ !GV->getType()->isSized() || isZeroLengthArray(GV->getType()))
+ return false;
+
+ // Create replacement global.
+ ArrayType *NewType = createLoweredType(GV->getType()->getElementType());
+ Constant *NewInitializer = createLoweredInitializer(NewType,
+ GV->getInitializer());
+ GlobalVariable *NewGV =
+ new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(),
+ NewInitializer, "", 0, GlobalVariable::NotThreadLocal,
+ GV->getType()->getAddressSpace(),
+ GV->isExternallyInitialized());
+
+ // Update uses.
+ SmallVector<User *, 16> Users(GV->use_begin(), GV->use_end());
+ for (unsigned I = 0, E = Users.size(); I != E; ++I) {
+ User *U = Users[I];
+ Instruction *Inst = cast<Instruction>(U);
+ IRBuilder<> Builder(Inst);
+ Function *GetID = Intrinsic::getDeclaration(GV->getParent(),
+ Intrinsic::xcore_getid);
+ Value *ThreadID = Builder.CreateCall(GetID);
+ SmallVector<Value *, 2> Indices;
+ Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx)));
+ Indices.push_back(ThreadID);
+ Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices);
+ U->replaceUsesOfWith(GV, Addr);
+ }
+
+ // Remove old global.
+ NewGV->takeName(GV);
+ GV->eraseFromParent();
+ return true;
+}
+
+bool XCoreLowerThreadLocal::runOnModule(Module &M) {
+ // Find thread local globals.
+ bool MadeChange = false;
+ SmallVector<GlobalVariable *, 16> ThreadLocalGlobals;
+ for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+ GVI != E; ++GVI) {
+ GlobalVariable *GV = GVI;
+ if (GV->isThreadLocal())
+ ThreadLocalGlobals.push_back(GV);
+ }
+ for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) {
+ MadeChange |= lowerGlobal(ThreadLocalGlobals[I]);
+ }
+ return MadeChange;
+}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 28c3d12..07e5fff 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -46,6 +46,7 @@ public:
return getTM<XCoreTargetMachine>();
}
+ virtual bool addPreISel();
virtual bool addInstSelector();
};
} // namespace
@@ -54,6 +55,11 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
return new XCorePassConfig(this, PM);
}
+bool XCorePassConfig::addPreISel() {
+ addPass(createXCoreLowerThreadLocalPass());
+ return false;
+}
+
bool XCorePassConfig::addInstSelector() {
addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
return false;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 8203899..88e3bfd 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -57,9 +57,4 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
ELF::SHF_ALLOC |
ELF::XCORE_SHF_CP_SECTION,
SectionKind::getReadOnlyWithRel());
-
- // Dynamic linking is not supported. Data with relocations is placed in the
- // same section as data without relocations.
- DataRelSection = DataRelLocalSection = DataSection;
- DataRelROSection = DataRelROLocalSection = ReadOnlySection;
}
diff --git a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8336d3a..a7bf188 100644
--- a/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -66,13 +67,13 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
static void FindUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
if (LLVMUsed == 0) return;
- ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
- if (Inits == 0) return;
-
- for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
- if (GlobalValue *GV =
- dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
- UsedValues.insert(GV);
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+ Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+ GlobalValue *GV = cast<GlobalValue>(Operand);
+ UsedValues.insert(GV);
+ }
}
// True if A is better than B.
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index dc99492..201f320 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -42,6 +42,7 @@ namespace {
private:
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+ SmallPtrSet<Constant *, 8> SeenConstants;
/// GlobalIsNeeded - mark the specific global value as needed, and
/// recursively mark anything that it uses as also needed.
@@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Make sure that all memory is released
AliveGlobals.clear();
+ SeenConstants.clear();
return Changed;
}
@@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
return GlobalIsNeeded(GV);
-
+
// Loop over all of the operands of the constant, adding any globals they
// use to the list of needed globals.
- for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
- if (Constant *OpC = dyn_cast<Constant>(*I))
- MarkUsedGlobalsAsNeeded(OpC);
+ for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+ // If we've already processed this constant there's no need to do it again.
+ Constant *Op = dyn_cast<Constant>(*I);
+ if (Op && SeenConstants.insert(Op))
+ MarkUsedGlobalsAsNeeded(Op);
+ }
}
// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index b035a82..0ef900e 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -3041,6 +3041,105 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
return true;
}
+static Value::use_iterator getFirst(Value *V, SmallPtrSet<Use*, 8> &Tried) {
+ for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+ Use *U = &I.getUse();
+ if (Tried.count(U))
+ continue;
+
+ User *Usr = *I;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Usr);
+ if (!GV || !GV->hasName()) {
+ Tried.insert(U);
+ return I;
+ }
+
+ StringRef Name = GV->getName();
+ if (Name != "llvm.used" && Name != "llvm.compiler_used") {
+ Tried.insert(U);
+ return I;
+ }
+ }
+ return V->use_end();
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New);
+
+static bool replaceUsesOfWithOnConstant(ConstantArray *CA, Value *From,
+ Value *ToV, Use *U) {
+ Constant *To = cast<Constant>(ToV);
+
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+ Constant *Op = CA->getOperand(i);
+ NewOps.push_back(Op == From ? To : Op);
+ }
+
+ Constant *Replacement = ConstantArray::get(CA->getType(), NewOps);
+ assert(Replacement != CA && "CA didn't contain From!");
+
+ bool Ret = replaceAllNonLLVMUsedUsesWith(CA, Replacement);
+ if (Replacement->use_empty())
+ Replacement->destroyConstant();
+ if (CA->use_empty())
+ CA->destroyConstant();
+ return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(ConstantExpr *CE, Value *From,
+ Value *ToV, Use *U) {
+ Constant *To = cast<Constant>(ToV);
+ SmallVector<Constant*, 8> NewOps;
+ for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+ Constant *Op = CE->getOperand(i);
+ NewOps.push_back(Op == From ? To : Op);
+ }
+
+ Constant *Replacement = CE->getWithOperands(NewOps);
+ assert(Replacement != CE && "CE didn't contain From!");
+
+ bool Ret = replaceAllNonLLVMUsedUsesWith(CE, Replacement);
+ if (Replacement->use_empty())
+ Replacement->destroyConstant();
+ if (CE->use_empty())
+ CE->destroyConstant();
+ return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(Constant *C, Value *From, Value *To,
+ Use *U) {
+ if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ return replaceUsesOfWithOnConstant(CA, From, To, U);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+ return replaceUsesOfWithOnConstant(CE, From, To, U);
+ C->replaceUsesOfWithOnConstant(From, To, U);
+ return true;
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New) {
+ SmallPtrSet<Use*, 8> Tried;
+ bool Ret = false;
+ for (;;) {
+ Value::use_iterator I = getFirst(Old, Tried);
+ if (I == Old->use_end())
+ break;
+ Use &U = I.getUse();
+
+ // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+ // constant because they are uniqued.
+ if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ if (!isa<GlobalValue>(C)) {
+ Ret |= replaceUsesOfWithOnConstant(C, Old, New, &U);
+ continue;
+ }
+ }
+
+ U.set(New);
+ Ret = true;
+ }
+ return Ret;
+}
+
bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool Changed = false;
@@ -3060,11 +3159,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
// Make all users of the alias use the aliasee instead.
- if (!J->use_empty()) {
- J->replaceAllUsesWith(Aliasee);
+ if (replaceAllNonLLVMUsedUsesWith(J, Aliasee)) {
++NumAliasesResolved;
Changed = true;
}
+ if (!J->use_empty())
+ continue;
// If the alias is externally visible, we may still be able to simplify it.
if (!J->hasLocalLinkage()) {
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 892100f..4ce749c 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -72,6 +72,15 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
STATISTIC(NumAliasesWritten, "Number of aliases generated");
STATISTIC(NumDoubleWeak, "Number of new functions created");
+/// Returns the type id for a type to be hashed. We turn pointer types into
+/// integers here because the actual compare logic below considers pointers and
+/// integers of the same size as equal.
+static Type::TypeID getTypeIDForHash(Type *Ty) {
+ if (Ty->isPointerTy())
+ return Type::IntegerTyID;
+ return Ty->getTypeID();
+}
+
/// Creates a hash-code for the function which is the same for any two
/// functions that will compare equal, without looking at the instructions
/// inside the function.
@@ -83,9 +92,9 @@ static unsigned profileFunction(const Function *F) {
ID.AddInteger(F->getCallingConv());
ID.AddBoolean(F->hasGC());
ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(FTy->getReturnType()->getTypeID());
+ ID.AddInteger(getTypeIDForHash(FTy->getReturnType()));
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ ID.AddInteger(getTypeIDForHash(FTy->getParamType(i)));
return ID.ComputeHash();
}
@@ -200,8 +209,7 @@ private:
// Any two pointers in the same address space are equivalent, intptr_t and
// pointers are equivalent. Otherwise, standard type equivalence rules apply.
-bool FunctionComparator::isEquivalentType(Type *Ty1,
- Type *Ty2) const {
+bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
if (Ty1 == Ty2)
return true;
if (Ty1->getTypeID() != Ty2->getTypeID()) {
@@ -740,7 +748,13 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
if (NewG->getReturnType()->isVoidTy()) {
Builder.CreateRetVoid();
} else {
- Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+ Type *RetTy = NewG->getReturnType();
+ if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
+ Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
+ else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
+ Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
+ else
+ Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
}
NewG->copyAttributesFrom(G);
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 027a9f2..986c0b8 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,7 +33,12 @@ RunLoopVectorization("vectorize-loops",
cl::desc("Run the Loop vectorization passes"));
static cl::opt<bool>
-RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
+RunSLPVectorization("vectorize-slp",
+ cl::desc("Run the SLP vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize-slp-aggressive",
+ cl::desc("Run the BB vectorization passes"));
static cl::opt<bool>
UseGVNAfterVectorization("use-gvn-after-vectorization",
@@ -52,7 +57,8 @@ PassManagerBuilder::PassManagerBuilder() {
DisableSimplifyLibCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
- Vectorize = RunBBVectorization;
+ BBVectorize = RunBBVectorization;
+ SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
}
@@ -207,7 +213,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
- if (Vectorize) {
+ if (SLPVectorize)
+ MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
+ if (BBVectorize) {
MPM.add(createBBVectorizePass());
MPM.add(createInstructionCombiningPass());
if (OptLevel > 1 && UseGVNAfterVectorization)
@@ -321,6 +330,14 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createGlobalDCEPass());
}
+inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
+ return reinterpret_cast<PassManagerBuilder*>(P);
+}
+
+inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
+ return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
+}
+
LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 5f8681f..3396f79 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -195,10 +195,9 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
if (LLVMUsed == 0) return;
UsedValues.insert(LLVMUsed);
-
- ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
- if (Inits == 0) return;
-
+
+ ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
if (GlobalValue *GV =
dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
index 1f6a3a5..2a36074 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
@@ -233,6 +233,7 @@ private:
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
Value *EmitGEPOffset(User *GEP);
+ Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
public:
// InsertNewInstBefore - insert an instruction New before instruction Old
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7595da0..166f8df 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -24,9 +24,9 @@ namespace {
/// Class representing coefficient of floating-point addend.
/// This class needs to be highly efficient, which is especially true for
/// the constructor. As of I write this comment, the cost of the default
- /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
+ /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
/// perform write-merging).
- ///
+ ///
class FAddendCoef {
public:
// The constructor has to initialize a APFloat, which is uncessary for
@@ -37,31 +37,31 @@ namespace {
//
FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
~FAddendCoef();
-
+
void set(short C) {
assert(!insaneIntVal(C) && "Insane coefficient");
IsFp = false; IntVal = C;
}
-
+
void set(const APFloat& C);
void negate();
-
+
bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
Value *getValue(Type *) const;
-
+
// If possible, don't define operator+/operator- etc because these
// operators inevitably call FAddendCoef's constructor which is not cheap.
void operator=(const FAddendCoef &A);
void operator+=(const FAddendCoef &A);
void operator-=(const FAddendCoef &A);
void operator*=(const FAddendCoef &S);
-
+
bool isOne() const { return isInt() && IntVal == 1; }
bool isTwo() const { return isInt() && IntVal == 2; }
bool isMinusOne() const { return isInt() && IntVal == -1; }
bool isMinusTwo() const { return isInt() && IntVal == -2; }
-
+
private:
bool insaneIntVal(int V) { return V > 4 || V < -4; }
APFloat *getFpValPtr(void)
@@ -74,26 +74,28 @@ namespace {
return *getFpValPtr();
}
- APFloat &getFpVal(void)
- { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
-
+ APFloat &getFpVal(void) {
+ assert(IsFp && BufHasFpVal && "Incorret state");
+ return *getFpValPtr();
+ }
+
bool isInt() const { return !IsFp; }
// If the coefficient is represented by an integer, promote it to a
- // floating point.
+ // floating point.
void convertToFpType(const fltSemantics &Sem);
// Construct an APFloat from a signed integer.
// TODO: We should get rid of this function when APFloat can be constructed
- // from an *SIGNED* integer.
+ // from an *SIGNED* integer.
APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
private:
bool IsFp;
-
+
// True iff FpValBuf contains an instance of APFloat.
bool BufHasFpVal;
-
+
// The integer coefficient of an individual addend is either 1 or -1,
// and we try to simplify at most 4 addends from neighboring at most
// two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
@@ -102,7 +104,7 @@ namespace {
AlignedCharArrayUnion<APFloat> FpValBuf;
};
-
+
/// FAddend is used to represent floating-point addend. An addend is
/// represented as <C, V>, where the V is a symbolic value, and C is a
/// constant coefficient. A constant addend is represented as <C, 0>.
@@ -110,10 +112,10 @@ namespace {
class FAddend {
public:
FAddend() { Val = 0; }
-
+
Value *getSymVal (void) const { return Val; }
const FAddendCoef &getCoef(void) const { return Coeff; }
-
+
bool isConstant() const { return Val == 0; }
bool isZero() const { return Coeff.isZero(); }
@@ -122,17 +124,17 @@ namespace {
{ Coeff.set(Coefficient); Val = V; }
void set(const ConstantFP* Coefficient, Value *V)
{ Coeff.set(Coefficient->getValueAPF()); Val = V; }
-
+
void negate() { Coeff.negate(); }
-
+
/// Drill down the U-D chain one step to find the definition of V, and
/// try to break the definition into one or two addends.
static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
-
+
/// Similar to FAddend::drillDownOneStep() except that the value being
/// splitted is the addend itself.
unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
-
+
void operator+=(const FAddend &T) {
assert((Val == T.Val) && "Symbolic-values disagree");
Coeff += T.Coeff;
@@ -140,12 +142,12 @@ namespace {
private:
void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
-
+
// This addend has the value of "Coeff * Val".
Value *Val;
FAddendCoef Coeff;
};
-
+
/// FAddCombine is the class for optimizing an unsafe fadd/fsub along
/// with its neighboring at most two instructions.
///
@@ -153,17 +155,17 @@ namespace {
public:
FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
Value *simplify(Instruction *FAdd);
-
+
private:
typedef SmallVector<const FAddend*, 4> AddendVect;
-
+
Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
Value *performFactorization(Instruction *I);
/// Convert given addend to a Value
Value *createAddendVal(const FAddend &A, bool& NeedNeg);
-
+
/// Return the number of instructions needed to emit the N-ary addition.
unsigned calcInstrNumber(const AddendVect& Vect);
Value *createFSub(Value *Opnd0, Value *Opnd1);
@@ -173,10 +175,10 @@ namespace {
Value *createFNeg(Value *V);
Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
void createInstPostProc(Instruction *NewInst);
-
+
InstCombiner::BuilderTy *Builder;
Instruction *Instr;
-
+
private:
// Debugging stuff are clustered here.
#ifndef NDEBUG
@@ -188,7 +190,7 @@ namespace {
void incCreateInstNum() {}
#endif
};
-}
+}
//===----------------------------------------------------------------------===//
//
@@ -211,7 +213,7 @@ void FAddendCoef::set(const APFloat& C) {
} else
*P = C;
- IsFp = BufHasFpVal = true;
+ IsFp = BufHasFpVal = true;
}
void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
@@ -225,7 +227,7 @@ void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
new(P) APFloat(Sem, 0 - IntVal);
P->changeSign();
}
- IsFp = BufHasFpVal = true;
+ IsFp = BufHasFpVal = true;
}
APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
@@ -254,14 +256,14 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
getFpVal().add(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
convertToFpType(T.getSemantics());
getFpVal().add(T, RndMode);
return;
}
-
+
APFloat &T = getFpVal();
T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
}
@@ -275,7 +277,7 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
getFpVal().subtract(That.getFpVal(), RndMode);
return;
}
-
+
if (isInt()) {
const APFloat &T = That.getFpVal();
convertToFpType(T.getSemantics());
@@ -303,7 +305,7 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
return;
}
- const fltSemantics &Semantic =
+ const fltSemantics &Semantic =
isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
if (isInt())
@@ -338,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const {
// A - B <1, A>, <1,B>
// 0 - B <-1, B>
// C * A, <C, A>
-// A + C <1, A> <C, NULL>
+// A + C <1, A> <C, NULL>
// 0 +/- 0 <0, NULL> (corner case)
//
// Legend: A and B are not constant, C is constant
-//
+//
unsigned FAddend::drillValueDownOneStep
(Value *Val, FAddend &Addend0, FAddend &Addend1) {
Instruction *I = 0;
@@ -413,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep
return 0;
unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
- if (!BreakNum || Coeff.isOne())
+ if (!BreakNum || Coeff.isOne())
return BreakNum;
Addend0.Scale(Coeff);
@@ -435,10 +437,10 @@ unsigned FAddend::drillAddendDownOneStep
Value *FAddCombine::performFactorization(Instruction *I) {
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
-
+
Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
-
+
if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
return 0;
@@ -453,14 +455,14 @@ Value *FAddCombine::performFactorization(Instruction *I) {
Value *Opnd1_0 = I1->getOperand(0);
Value *Opnd1_1 = I1->getOperand(1);
- // Input Instr I Factor AddSub0 AddSub1
+ // Input Instr I Factor AddSub0 AddSub1
// ----------------------------------------------
// (x*y) +/- (x*z) x y z
// (y/x) +/- (z/x) x y z
//
Value *Factor = 0;
Value *AddSub0 = 0, *AddSub1 = 0;
-
+
if (isMpy) {
if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
Factor = Opnd0_0;
@@ -492,7 +494,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
if (isMpy)
return createFMul(Factor, NewAddSub);
-
+
return createFDiv(NewAddSub, Factor);
}
@@ -506,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
- // Save the instruction before calling other member-functions.
+ // Save the instruction before calling other member-functions.
Instr = I;
FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
@@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
unsigned Opnd0_ExpNum = 0;
unsigned Opnd1_ExpNum = 0;
- if (!Opnd0.isConstant())
+ if (!Opnd0.isConstant())
Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
// Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
@@ -539,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) {
Value *V0 = I->getOperand(0);
Value *V1 = I->getOperand(1);
- InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
+ InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
(!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
@@ -579,7 +581,7 @@ Value *FAddCombine::simplify(Instruction *I) {
return R;
}
- // step 6: Try factorization as the last resort,
+ // step 6: Try factorization as the last resort,
return performFactorization(I);
}
@@ -588,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
unsigned AddendNum = Addends.size();
assert(AddendNum <= 4 && "Too many addends");
- // For saving intermediate results;
+ // For saving intermediate results;
unsigned NextTmpIdx = 0;
FAddend TmpResult[3];
@@ -604,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
AddendVect SimpVect;
// The outer loop works on one symbolic-value at a time. Suppose the input
- // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
+ // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
// The symbolic-values will be processed in this order: x, y, z.
//
for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
@@ -631,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
if (T && T->getSymVal() == Val) {
// Set null such that next iteration of the outer loop will not process
// this addend again.
- Addends[SameSymIdx] = 0;
+ Addends[SameSymIdx] = 0;
SimpVect.push_back(T);
}
}
@@ -644,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
R += *SimpVect[Idx];
// Pop all addends being folded and push the resulting folded addend.
- SimpVect.resize(StartIdx);
+ SimpVect.resize(StartIdx);
if (Val != 0) {
if (!R.isZero()) {
SimpVect.push_back(&R);
@@ -657,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
}
}
- assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
+ assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
"out-of-bound access");
if (ConstAdd)
@@ -679,7 +681,7 @@ Value *FAddCombine::createNaryFAdd
assert(!Opnds.empty() && "Expect at least one addend");
// Step 1: Check if the # of instructions needed exceeds the quota.
- //
+ //
unsigned InstrNeeded = calcInstrNumber(Opnds);
if (InstrNeeded > InstrQuota)
return 0;
@@ -700,7 +702,7 @@ Value *FAddCombine::createNaryFAdd
// Iterate the addends, creating fadd/fsub using adjacent two addends.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
I != E; I++) {
- bool NeedNeg;
+ bool NeedNeg;
Value *V = createAddendVal(**I, NeedNeg);
if (!LastVal) {
LastVal = V;
@@ -726,7 +728,7 @@ Value *FAddCombine::createNaryFAdd
}
#ifndef NDEBUG
- assert(CreateInstrNum == InstrNeeded &&
+ assert(CreateInstrNum == InstrNeeded &&
"Inconsistent in instruction numbers");
#endif
@@ -784,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
unsigned OpndNum = Opnds.size();
unsigned InstrNeeded = OpndNum - 1;
- // The number of addends in the form of "(-1)*x".
- unsigned NegOpndNum = 0;
+ // The number of addends in the form of "(-1)*x".
+ unsigned NegOpndNum = 0;
// Adjust the number of instructions needed to emit the N-ary add.
for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
@@ -972,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
+ // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+ // transform them into (X + (signbit ^ C))
+ if (XorRHS->getValue().isSignBit())
+ return BinaryOperator::CreateAdd(XorLHS,
+ ConstantExpr::getXor(XorRHS, CI));
}
}
@@ -1230,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
}
+ // select C, 0, B + select C, A, 0 -> select C, A, B
+ {
+ Value *A1, *B1, *C1, *A2, *B2, *C2;
+ if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+ match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+ if (C1 == C2) {
+ Constant *Z1=0, *Z2=0;
+ Value *A, *B, *C=C1;
+ if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(A1); A = A2;
+ Z2 = dyn_cast<Constant>(B2); B = B1;
+ } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+ Z1 = dyn_cast<Constant>(B1); B = B2;
+ Z2 = dyn_cast<Constant>(A2); A = A1;
+ }
+
+ if (Z1 && Z2 &&
+ (I.hasNoSignedZeros() ||
+ (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+ return SelectInst::Create(C, A, B);
+ }
+ }
+ }
+ }
+
if (I.hasUnsafeAlgebra()) {
if (Value *V = FAddCombine(Builder).simplify(&I))
return ReplaceInstUsesWith(I, V);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 990cbc3..ec75dd2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
return 0;
}
-
-/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient
+/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
+/// (V < Lo || V >= Hi). In practice, we emit the more efficient
/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates
/// whether to treat the V, Lo and HI as signed or not. IB is the location to
/// insert new instructions.
@@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+ return 0;
+
// (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
switch (RHSCC) {
default: llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
- if (LHSCst == SubOne(RHSCst)) {
- // (X == 13 | X == 14) -> X-13 <u 2
- Constant *AddCST = ConstantExpr::getNeg(LHSCst);
- Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
- return Builder->CreateICmpULT(Add, AddCST);
- }
-
if (LHS->getOperand(0) == RHS->getOperand(0)) {
// if LHSCst and RHSCst differ only by one bit:
// (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
@@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
}
+ if (LHSCst == SubOne(RHSCst)) {
+ // (X == 13 | X == 14) -> X-13 <u 2
+ Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+ Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+ return Builder->CreateICmpULT(Add, AddCST);
+ }
+
break; // (X == 13 | X == 15) -> no change
case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 64cd1bd..78b4a2c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
- const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
+ const AttributeSet &NewPAL =
+ AttributeSet::get(FTy->getContext(), NewAttrs);
Instruction *NewCaller;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index a96e754..4c252c0 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -232,7 +232,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return 0;
-
+
uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays.
@@ -2487,6 +2487,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(Pred, Y, Z);
}
+ // icmp slt (X + -1), Y -> icmp sle X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+ // icmp sge (X + -1), Y -> icmp sgt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+ match(B, m_AllOnes()))
+ return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+ // icmp sle (X + 1), Y -> icmp slt X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+ // icmp sgt (X + 1), Y -> icmp sge X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
+ match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+ // if C1 has greater magnitude than C2:
+ // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+ // s.t. C3 = C1 - C2
+ //
+ // if C2 has greater magnitude than C1:
+ // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+ // s.t. C3 = C2 - C1
+ if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+ (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+ if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+ if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+ const APInt &AP1 = C1->getValue();
+ const APInt &AP2 = C2->getValue();
+ if (AP1.isNegative() == AP2.isNegative()) {
+ APInt AP1Abs = C1->getValue().abs();
+ APInt AP2Abs = C2->getValue().abs();
+ if (AP1Abs.uge(AP2Abs)) {
+ ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+ Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+ return new ICmpInst(Pred, NewAdd, C);
+ } else {
+ ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+ Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+ return new ICmpInst(Pred, A, NewAdd);
+ }
+ }
+ }
+
+
// Analyze the case when either Op0 or Op1 is a sub instruction.
// Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
A = 0; B = 0; C = 0; D = 0;
@@ -2620,6 +2669,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
{ Value *A, *B;
+ // Transform (A & ~B) == 0 --> (A & B) != 0
+ // and (A & ~B) != 0 --> (A & B) == 0
+ // if A is a power of 2.
+ if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality())
+ return new ICmpInst(I.getInversePredicate(),
+ Builder->CreateAnd(A, B),
+ Op1);
+
// ~x < ~y --> y < x
// ~x < cst --> ~cst < x
if (match(Op0, m_Not(m_Value(A)))) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 337cfe3..e2d7966 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
// If the GEP has all zero indices, it doesn't offset the pointer. If it
// doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
- IsOffset || !GEP->hasAllZeroIndices()))
+ if (!isOnlyCopiedFromConstantGlobal(
+ GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
return false;
continue;
}
@@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (AI.isArrayAllocation()) { // Check C != 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
+ Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
New->setAlignment(AI.getAlignment());
@@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
if (IC.getDataLayout() &&
- (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
@@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before the load, cast
// the result of the loaded value.
- LoadInst *NewLoad =
+ LoadInst *NewLoad =
IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
NewLoad->setAlignment(LI.getAlignment());
NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return 0;
-
+
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
@@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Constant::getNullValue(Op->getType()), &LI);
return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
}
- }
+ }
// load null/undef -> unreachable
// TODO: Consider a target hook for valid address spaces for this xform.
@@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (CE->isCast())
if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
return Res;
-
+
if (Op->hasOneUse()) {
// Change select and PHI nodes to select values instead of addresses: this
// helps alias analysis out a lot, allows many others simplifications, and
@@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
if (SrcTy == 0) return 0;
-
+
Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
-
+
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
/// store i32 xxx, (bitcast {foo*, float}* %P to i32*)
/// on 32-bit hosts.
SmallVector<Value*, 4> NewGEPIndices;
-
+
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
@@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
-
+
while (1) {
if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
break;
}
}
-
+
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
-
+
// If the pointers point into different address spaces or if they point to
// values with different sizes, we can't do the transformation.
if (!IC.getDataLayout() ||
- SrcTy->getAddressSpace() !=
+ SrcTy->getAddressSpace() !=
cast<PointerType>(CI->getType())->getAddressSpace() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
return 0;
// Okay, we are casting from one integer or pointer type to another of
- // the same size. Instead of casting the pointer before
+ // the same size. Instead of casting the pointer before
// the store, cast the value to be stored.
Value *NewCast;
Value *SIOp0 = SI.getOperand(0);
@@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
if (SIOp0->getType()->isPointerTy())
opcode = Instruction::PtrToInt;
}
-
+
// SIOp0 is a pointer to aggregate and this is a store to the first field,
// emit a GEP to index into its first field.
if (!NewGEPIndices.empty())
CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-
+
NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
SIOp0->getName()+".c");
SI.setOperand(0, NewCast);
@@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
static bool equivalentAddressValues(Value *A, Value *B) {
// Test if the values are trivially equivalent.
if (A == B) return true;
-
+
// Test if the values come form identical arithmetic instructions.
// This uses isIdenticalToWhenDefined instead of isIdenticalTo because
// its only used to compare two uses within the same basic block, which
@@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
if (Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
-
+
// Otherwise they may not be equivalent.
return false;
}
@@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
if (Ptr->hasOneUse()) {
- if (isa<AllocaInst>(Ptr))
+ if (isa<AllocaInst>(Ptr))
return EraseInstFromFunction(SI);
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
- }
-
+ }
+
if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
// Prev store isn't volatile, and stores to the same location?
if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
break;
}
-
+
// If this is a load, we have to stop. However, if the loaded value is from
// the pointer we're loading and is producing the pointer we're storing,
// then *this* store is dead (X = load P; store X -> P).
@@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
LI->isSimple())
return EraseInstFromFunction(SI);
-
+
// Otherwise, this is a load from some other location. Stores before it
// may not be dead.
break;
}
-
+
// Don't skip over loads or things that can modify memory.
if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
break;
@@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (Instruction *Res = InstCombineStoreToCast(*this, SI))
return Res;
-
+
// If this store is the last instruction in the basic block (possibly
// excepting debug info instructions), and if the block ends with an
// unconditional branch, try to move it to the successor block.
- BBI = &SI;
+ BBI = &SI;
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
return 0; // xform done!
-
+
return 0;
}
@@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BasicBlock *StoreBB = SI.getParent();
-
+
// Check to see if the successor block has exactly two incoming edges. If
// so, see if the other predecessor contains a store to the same location.
// if so, insert a PHI node (if needed) and move the stores down.
BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-
+
// Determine whether Dest has exactly two predecessors and, if so, compute
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
@@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
if (++PI == pred_end(DestBB))
return false;
-
+
P = *PI;
if (P != StoreBB) {
if (OtherBB)
@@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
if (!OtherBr || BBI == OtherBB->begin())
return false;
-
+
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. there is an instruction before the branch.
StoreInst *OtherStore = 0;
@@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
// destinations is StoreBB, then we have the if/then case.
- if (OtherBr->getSuccessor(0) != StoreBB &&
+ if (OtherBr->getSuccessor(0) != StoreBB &&
OtherBr->getSuccessor(1) != StoreBB)
return false;
-
+
// Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
// if/then triangle. See if there is a store to the same ptr as SI that
// lives in OtherBB.
@@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
BBI == OtherBB->begin())
return false;
}
-
+
// In order to eliminate the store in OtherBr, we have to
// make sure nothing reads or overwrites the stored value in
// StoreBB.
@@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
return false;
}
}
-
+
// Insert a PHI node now if we need it.
Value *MergedVal = OtherStore->getOperand(0);
if (MergedVal != SI.getOperand(0)) {
@@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
PN->addIncoming(OtherStore->getOperand(0), OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
}
-
+
// Advance to a place where it is safe to insert the new store and
// insert it.
BBI = DestBB->getFirstInsertionPt();
@@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
SI.getOrdering(),
SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
- NewSI->setDebugLoc(OtherStore->getDebugLoc());
+ NewSI->setDebugLoc(OtherStore->getDebugLoc());
// If the two stores had the same TBAA tag, preserve it.
if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
@@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
OtherStore->getMetadata(LLVMContext::MD_tbaa))))
NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
-
+
// Nuke the old stores.
EraseInstFromFunction(SI);
EraseInstFromFunction(*OtherStore);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 173f2bf..ecc9fc3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// if this is safe. For example, the use could be in dynamically unreached
// code.
if (!V->hasOneUse()) return 0;
-
+
bool MadeChange = false;
// ((1 << A) >>u B) --> (1 << (A-B))
@@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
A = IC.Builder->CreateSub(A, B);
return IC.Builder->CreateShl(PowerOf2, A);
}
-
+
// (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
@@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
I->setOperand(0, V2);
MadeChange = true;
}
-
+
if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
I->setIsExact();
MadeChange = true;
}
-
+
if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
I->setHasNoUnsignedWrap();
MadeChange = true;
@@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// TODO: Lots more we could do here:
// If V is a phi node, we can call this on each of its operands.
// "select cond, X, 0" can simplify to "X".
-
+
return MadeChange ? V : 0;
}
@@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
LHSExt = LHSExt.zext(W * 2);
RHSExt = RHSExt.zext(W * 2);
}
-
+
APInt MulExt = LHSExt * RHSExt;
-
+
if (!sign)
return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
-
+
APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
return MulExt.slt(Min) || MulExt.sgt(Max);
@@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_AllOnes())) // X * -1 == 0 - X
return BinaryOperator::CreateNeg(Op0, I.getName());
-
+
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-
+
// ((X << C1)*C2) == (X * (C2 << C1))
if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
if (SI->getOpcode() == Instruction::Shl)
if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
return BinaryOperator::CreateMul(SI->getOperand(0),
ConstantExpr::getShl(CI, ShOp));
-
+
const APInt &Val = CI->getValue();
if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C
Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
@@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
return Shl;
}
-
+
// Canonicalize (X+C1)*CI -> X*CI+C1*CI.
{ Value *X; ConstantInt *C1;
if (Op0->hasOneUse() &&
@@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
}
-
+
// Simplify mul instructions with a constant RHS.
- if (isa<Constant>(Op1)) {
+ if (isa<Constant>(Op1)) {
// Try to fold constant mul into select arguments.
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
Value *Op1C = Op1;
BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
if (!BO ||
- (BO->getOpcode() != Instruction::UDiv &&
+ (BO->getOpcode() != Instruction::UDiv &&
BO->getOpcode() != Instruction::SDiv)) {
Op1C = Op0;
BO = dyn_cast<BinaryOperator>(Op1);
@@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(Op1, m_Shl(m_One(), m_Value(Y))))
return BinaryOperator::CreateShl(Op0, Y);
}
-
+
// If one of the operands of the multiply is a cast from a boolean value, then
// we know the bool is either zero or one, so this is a 'masking' multiply.
// X * Y (where Y is 0 or 1) -> X & (0-Y)
if (!I.getType()->isVectorTy()) {
// -2 is "-1 << 1" so it is all bits set except the low one.
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
-
+
Value *BoolCast = 0, *OtherOp = 0;
if (MaskedValueIsZero(Op0, Negative2))
BoolCast = Op0, OtherOp = Op1;
@@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
return;
if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
return;
-
+
ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
if (CFP && CFP->isExactlyValue(0.5)) {
Y = I->getOperand(1);
@@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
CFP = dyn_cast<ConstantFP>(I->getOperand(1));
if (CFP && CFP->isExactlyValue(0.5))
Y = I->getOperand(0);
-}
+}
/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
/// true iff the given value is FMul or FDiv with one and only one operand
/// being a normal constant (i.e. not Zero/NaN/Infinity).
static bool isFMulOrFDivWithConstant(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
- if (!I || (I->getOpcode() != Instruction::FMul &&
+ if (!I || (I->getOpcode() != Instruction::FMul &&
I->getOpcode() != Instruction::FDiv))
return false;
@@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) {
/// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
-/// This function is to simplify "FMulOrDiv * C" and returns the
+/// This function is to simplify "FMulOrDiv * C" and returns the
/// resulting expression. Note that this function could return NULL in
/// case the constants cannot be folded into a normal floating-point.
-///
+///
Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
Instruction *InsertBefore) {
assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
@@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
if (isNormalFp(F)) {
R = BinaryOperator::CreateFMul(Opnd0, F);
} else {
- // (X / C1) * C => X / (C1/C)
+ // (X / C1) * C => X / (C1/C)
Constant *F = ConstantExpr::getFDiv(C1, C);
if (isNormalFp(cast<ConstantFP>(F)))
R = BinaryOperator::CreateFDiv(Opnd0, F);
@@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (C0) {
std::swap(C0, C1);
std::swap(Opnd0, Opnd1);
- Swap = true;
+ Swap = true;
}
if (C1 && C1->getValueAPF().isNormal() &&
isFMulOrFDivWithConstant(Opnd0)) {
Value *M1 = ConstantExpr::getFMul(C1, C);
- Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
+ Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
0;
if (M0 && M1) {
@@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
// (X*Y) * X => (X*X) * Y where Y != X
- // The purpose is two-fold:
+ // The purpose is two-fold:
// 1) to form a power expression (of X).
// 2) potentially shorten the critical path: After transformation, the
// latency of the instruction Y is amortized by the expression of X*X,
@@ -524,6 +524,35 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ // B * (uitofp i1 C) -> select C, B, 0
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *LHS = Op0, *RHS = Op1;
+ Value *B, *C;
+ if (!match(RHS, m_UIToFp(m_Value(C))))
+ std::swap(LHS, RHS);
+
+ if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) {
+ B = LHS;
+ Value *Zero = ConstantFP::getNegativeZero(B->getType());
+ return SelectInst::Create(C, B, Zero);
+ }
+ }
+
+ // A * (1 - uitofp i1 C) -> select C, 0, A
+ if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+ Value *LHS = Op0, *RHS = Op1;
+ Value *A, *C;
+ if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))))
+ std::swap(LHS, RHS);
+
+ if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) &&
+ C->getType()->isIntegerTy(1)) {
+ A = LHS;
+ Value *Zero = ConstantFP::getNegativeZero(A->getType());
+ return SelectInst::Create(C, Zero, A);
+ }
+ }
+
if (!isa<Constant>(Op1))
std::swap(Opnd0, Opnd1);
else
@@ -537,7 +566,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
/// instruction.
bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
SelectInst *SI = cast<SelectInst>(I.getOperand(1));
-
+
// div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
int NonNullOperand = -1;
if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
@@ -547,36 +576,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
if (ST->isNullValue())
NonNullOperand = 1;
-
+
if (NonNullOperand == -1)
return false;
-
+
Value *SelectCond = SI->getOperand(0);
-
+
// Change the div/rem to use 'Y' instead of the select.
I.setOperand(1, SI->getOperand(NonNullOperand));
-
+
// Okay, we know we replace the operand of the div/rem with 'Y' with no
// problem. However, the select, or the condition of the select may have
// multiple uses. Based on our knowledge that the operand must be non-zero,
// propagate the known value for the select into other uses of it, and
// propagate a known value of the condition into its other users.
-
+
// If the select and condition only have a single use, don't bother with this,
// early exit.
if (SI->use_empty() && SelectCond->hasOneUse())
return true;
-
+
// Scan the current block backward, looking for other uses of SI.
BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
-
+
while (BBI != BBFront) {
--BBI;
// If we found a call to a function, we can't assume it will return, so
// information from below it cannot be propagated above it.
if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
break;
-
+
// Replace uses of the select or its condition with the known values.
for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
I != E; ++I) {
@@ -589,17 +618,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
Worklist.Add(BBI);
}
}
-
+
// If we past the instruction, quit looking for it.
if (&*BBI == SI)
SI = 0;
if (&*BBI == SelectCond)
SelectCond = 0;
-
+
// If we ran out of things to eliminate, break out of the loop.
if (SelectCond == 0 && SI == 0)
break;
-
+
}
return true;
}
@@ -617,7 +646,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
I.setOperand(1, V);
return &I;
}
-
+
// Handle cases involving: [su]div X, (select Cond, Y, Z)
// This does not apply for fdiv.
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
@@ -683,16 +712,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Handle the integer div common cases
if (Instruction *Common = commonIDivTransforms(I))
return Common;
-
- {
+
+ {
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
const APInt *C;
if (match(Op1, m_Power2(C))) {
BinaryOperator *LShr =
- BinaryOperator::CreateLShr(Op0,
- ConstantInt::get(Op0->getType(),
+ BinaryOperator::CreateLShr(Op0,
+ ConstantInt::get(Op0->getType(),
C->logBase2()));
if (I.isExact()) LShr->setIsExact();
return LShr;
@@ -732,7 +761,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return BinaryOperator::CreateLShr(Op0, N);
}
}
-
+
// udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
// where C1&C2 are powers of two.
{ Value *Cond; const APInt *C1, *C2;
@@ -740,11 +769,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
// Construct the "on true" case of the select
Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
I.isExact());
-
+
// Construct the "on false" case of the select
Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
I.isExact());
-
+
// construct the select instruction and return it.
return SelectInst::Create(Cond, TSI, FSI);
}
@@ -799,7 +828,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
// X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
}
-
+
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
@@ -809,13 +838,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
}
}
-
+
return 0;
}
/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
/// FP value and:
-/// 1) 1/C is exact, or
+/// 1) 1/C is exact, or
/// 2) reciprocal is allowed.
/// If the convertion was successful, the simplified expression "X * 1/C" is
/// returned; otherwise, NULL is returned.
@@ -826,7 +855,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
const APFloat &FpVal = Divisor->getValueAPF();
APFloat Reciprocal(FpVal.getSemantics());
bool Cvt = FpVal.getExactInverse(&Reciprocal);
-
+
if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
(void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
@@ -870,10 +899,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Constant *C = ConstantExpr::getFMul(C1, C2);
const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
if (F.isNormal() && !F.isDenormal()) {
- Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
+ Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
AllowReciprocal);
if (!Res)
- Res = BinaryOperator::CreateFDiv(X, C);
+ Res = BinaryOperator::CreateFDiv(X, C);
}
}
@@ -911,7 +940,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Fold) {
const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
if (FoldC.isNormal() && !FoldC.isDenormal()) {
- Instruction *R = CreateDiv ?
+ Instruction *R = CreateDiv ?
BinaryOperator::CreateFDiv(Fold, X) :
BinaryOperator::CreateFMul(X, Fold);
R->setFastMathFlags(I.getFastMathFlags());
@@ -997,7 +1026,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
if (Instruction *common = commonIRemTransforms(I))
return common;
-
+
// X urem C^2 -> X and C-1
{ const APInt *C;
if (match(Op1, m_Power2(C)))
@@ -1005,7 +1034,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
ConstantInt::get(I.getType(), *C-1));
}
- // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
+ // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
@@ -1041,7 +1070,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
// Handle the integer rem common cases
if (Instruction *Common = commonIRemTransforms(I))
return Common;
-
+
if (Value *RHSNeg = dyn_castNegVal(Op1))
if (!isa<Constant>(RHSNeg) ||
(isa<ConstantInt>(RHSNeg) &&
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index b0a998c..bd14e81 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
unsigned Opc = FirstInst->getOpcode();
Value *LHSVal = FirstInst->getOperand(0);
Value *RHSVal = FirstInst->getOperand(1);
-
+
Type *LHSType = LHSVal->getType();
Type *RHSType = RHSVal->getType();
-
+
bool isNUW = false, isNSW = false, isExact = false;
if (OverflowingBinaryOperator *BO =
dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
@@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
} else if (PossiblyExactOperator *PEO =
dyn_cast<PossiblyExactOperator>(FirstInst))
isExact = PEO->isExact();
-
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
@@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
return 0;
-
+
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
if (isNSW)
isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
if (isExact)
isExact = cast<PossiblyExactOperator>(I)->isExact();
-
+
// Keep track of which operand needs a phi node.
if (I->getOperand(0) != LHSVal) LHSVal = 0;
if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
// bad when the PHIs are in the header of a loop.
if (!LHSVal && !RHSVal)
return 0;
-
+
// Otherwise, this is safe to transform!
-
+
Value *InLHS = FirstInst->getOperand(0);
Value *InRHS = FirstInst->getOperand(1);
PHINode *NewLHS = 0, *NewRHS = 0;
@@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewLHS, PN);
LHSVal = NewLHS;
}
-
+
if (RHSVal == 0) {
NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(1)->getName() + ".pn");
@@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewRHS, PN);
RHSVal = NewRHS;
}
-
+
// Add all operands to the new PHIs.
if (NewLHS || NewRHS) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
@@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
}
}
}
-
+
if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
LHSVal, RHSVal);
@@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
-
- SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
+
+ SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
FirstInst->op_end());
// This is true if all GEP bases are allocas and if all indices into them are
// constants.
@@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// more than one phi, which leads to higher register pressure. This is
// especially bad when the PHIs are in the header of a loop.
bool NeededPhi = false;
-
+
bool AllInBounds = true;
-
+
// Scan to see if all operands are the same opcode, and all have one use.
for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
return 0;
AllInBounds &= GEP->isInBounds();
-
+
// Keep track of whether or not all GEPs are of alloca pointers.
if (AllBasePointersAreAllocas &&
(!isa<AllocaInst>(GEP->getOperand(0)) ||
!GEP->hasAllConstantIndices()))
AllBasePointersAreAllocas = false;
-
+
// Compare the operand lists.
for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
if (FirstInst->getOperand(op) == GEP->getOperand(op))
continue;
-
+
// Don't merge two GEPs when two operands differ (introducing phi nodes)
// if one of the PHIs has a constant for the index. The index may be
// substantially cheaper to compute for the constants, so making it a
@@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
isa<ConstantInt>(GEP->getOperand(op)))
return 0;
-
+
if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
return 0;
@@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
NeededPhi = true;
}
}
-
+
// If all of the base pointers of the PHI'd GEPs are from allocas, don't
// bother doing this transformation. At best, this will just save a bit of
// offset calculation, but all the predecessors will have to materialize the
@@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// which can usually all be folded into the load.
if (AllBasePointersAreAllocas)
return 0;
-
+
// Otherwise, this is safe to transform. Insert PHI nodes for each operand
// that is variable.
SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
-
+
bool HasAnyPHIs = false;
for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
if (FixedOperands[i]) continue; // operand doesn't need a phi.
@@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
FirstOp->getName()+".pn");
InsertNewInstBefore(NewPN, PN);
-
+
NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
OperandPhis[i] = NewPN;
FixedOperands[i] = NewPN;
HasAnyPHIs = true;
}
-
+
// Add all operands to the new PHIs.
if (HasAnyPHIs) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
BasicBlock *InBB = PN.getIncomingBlock(i);
-
+
for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
if (PHINode *OpPhi = OperandPhis[op])
OpPhi->addIncoming(InGEP->getOperand(op), InBB);
}
}
-
+
Value *Base = FixedOperands[0];
- GetElementPtrInst *NewGEP =
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
@@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
/// to a register.
static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
BasicBlock::iterator BBI = L, E = L->getParent()->end();
-
+
for (++BBI; BBI != E; ++BBI)
if (BBI->mayWriteToMemory())
return false;
-
+
// Check for non-address taken alloca. If not address-taken already, it isn't
// profitable to do this xform.
if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
@@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
isAddressTaken = true;
break;
}
-
+
if (!isAddressTaken && AI->isStaticAlloca())
return false;
}
-
+
// If this load is a load from a GEP with a constant offset from an alloca,
// then we don't want to sink it. In its present form, it will be
// load [constant stack offset]. Sinking it will cause us to have to
@@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
return false;
-
+
return true;
}
@@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
bool isVolatile = FirstLI->isVolatile();
unsigned LoadAlignment = FirstLI->getAlignment();
unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
-
+
// We can't sink the load if the loaded value could be modified between the
// load and the PHI.
if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
!isSafeAndProfitableToSinkLoad(FirstLI))
return 0;
-
+
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
- if (isVolatile &&
+ if (isVolatile &&
FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
-
+
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
if (!LI || !LI->hasOneUse())
return 0;
-
- // We can't sink the load if the loaded value could be modified between
+
+ // We can't sink the load if the loaded value could be modified between
// the load and the PHI.
if (LI->isVolatile() != isVolatile ||
LI->getParent() != PN.getIncomingBlock(i) ||
LI->getPointerAddressSpace() != LoadAddrSpace ||
!isSafeAndProfitableToSinkLoad(LI))
return 0;
-
+
// If some of the loads have an alignment specified but not all of them,
// we can't do the transformation.
if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
return 0;
-
+
LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
-
+
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
@@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
return 0;
}
-
+
// Okay, they are all the same operation. Create a new PHI node of the
// correct type, and PHI together all of the LHS's of the instructions.
PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
PN.getNumIncomingValues(),
PN.getName()+".in");
-
+
Value *InVal = FirstLI->getOperand(0);
NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-
+
// Add all operands to the new PHI.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
@@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
InVal = 0;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
-
+
Value *PhiVal;
if (InVal) {
// The new PHI unions all of the same values together. This is really
@@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
InsertNewInstBefore(NewPN, PN);
PhiVal = NewPN;
}
-
+
// If this was a volatile load that we are merging, make sure to loop through
// and mark all the input loads as non-volatile. If we don't do this, we will
// insert a new volatile load and the old ones will not be deletable.
if (isVolatile)
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
-
+
LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
NewLI->setDebugLoc(FirstLI->getDebugLoc());
return NewLI;
@@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
return FoldPHIArgGEPIntoPHI(PN);
if (isa<LoadInst>(FirstInst))
return FoldPHIArgLoadIntoPHI(PN);
-
+
// Scan the instruction, looking for input operations that can be folded away.
// If all input operands to the phi are the same instruction (e.g. a cast from
// the same type or "+42") we can pull the operation through the PHI, reducing
@@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
Constant *ConstantOp = 0;
Type *CastSrcTy = 0;
bool isNUW = false, isNSW = false, isExact = false;
-
+
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
@@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
return 0;
}
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
- // Can fold binop, compare or shift here if the RHS is a constant,
+ // Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
if (ConstantOp == 0)
return FoldPHIArgBinOpIntoPHI(PN);
-
+
if (OverflowingBinaryOperator *BO =
dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
isNUW = BO->hasNoUnsignedWrap();
@@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
} else if (I->getOperand(1) != ConstantOp) {
return 0;
}
-
+
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
if (isNSW)
@@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
NewCI->setDebugLoc(FirstInst->getDebugLoc());
return NewCI;
}
-
+
if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
if (isNUW) BinOp->setHasNoUnsignedWrap();
@@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
BinOp->setDebugLoc(FirstInst->getDebugLoc());
return BinOp;
}
-
+
CmpInst *CIOp = cast<CmpInst>(FirstInst);
CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
PhiVal, ConstantOp);
@@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN,
// Remember this node, and if we find the cycle, return.
if (!PotentiallyDeadPHIs.insert(PN))
return true;
-
+
// Don't scan crazily complex things.
if (PotentiallyDeadPHIs.size() == 16)
return false;
@@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN,
/// PHIsEqualValue - Return true if this phi node is always equal to
/// NonPhiInVal. This happens with mutually cyclic phi nodes like:
/// z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
// See if we already saw this PHI node.
if (!ValueEqualPHIs.insert(PN))
return true;
-
+
// Don't scan crazily complex things.
if (ValueEqualPHIs.size() == 16)
return false;
-
+
// Scan the operands to see if they are either phi nodes or are equal to
// the value.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
} else if (Op != NonPhiInVal)
return false;
}
-
+
return true;
}
@@ -557,10 +557,10 @@ struct PHIUsageRecord {
unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
unsigned Shift; // The amount shifted.
Instruction *Inst; // The trunc instruction.
-
+
PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
: PHIId(pn), Shift(Sh), Inst(User) {}
-
+
bool operator<(const PHIUsageRecord &RHS) const {
if (PHIId < RHS.PHIId) return true;
if (PHIId > RHS.PHIId) return false;
@@ -570,15 +570,15 @@ struct PHIUsageRecord {
RHS.Inst->getType()->getPrimitiveSizeInBits();
}
};
-
+
struct LoweredPHIRecord {
PHINode *PN; // The PHI that was lowered.
unsigned Shift; // The amount shifted.
unsigned Width; // The width extracted.
-
+
LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
: PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
-
+
// Ctor form used by DenseMap.
LoweredPHIRecord(PHINode *pn, unsigned Sh)
: PN(pn), Shift(Sh), Width(0) {}
@@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// PHIUsers - Keep track of all of the truncated values extracted from a set
// of PHIs, along with their offset. These are the things we want to rewrite.
SmallVector<PHIUsageRecord, 16> PHIUsers;
-
+
// PHIs are often mutually cyclic, so we keep track of a whole set of PHI
// nodes which are extracted from. PHIsToSlice is a set we use to avoid
// revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
// check the uses of (to ensure they are all extracts).
SmallVector<PHINode*, 8> PHIsToSlice;
SmallPtrSet<PHINode*, 8> PHIsInspected;
-
+
PHIsToSlice.push_back(&FirstPhi);
PHIsInspected.insert(&FirstPhi);
-
+
for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
PHINode *PN = PHIsToSlice[PHIId];
-
+
// Scan the input list of the PHI. If any input is an invoke, and if the
// input is defined in the predecessor, then we won't be split the critical
// edge which is required to insert a truncate. Because of this, we have to
@@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (II == 0) continue;
if (II->getParent() != PN->getIncomingBlock(i))
continue;
-
+
// If we have a phi, and if it's directly in the predecessor, then we have
// a critical edge where we need to put the truncate. Since we can't
// split the edge in instcombine, we have to bail out.
return 0;
}
-
-
+
+
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
-
+
// If the user is a PHI, inspect its uses recursively.
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (PHIsInspected.insert(UserPN))
PHIsToSlice.push_back(UserPN);
continue;
}
-
+
// Truncates are always ok.
if (isa<TruncInst>(User)) {
PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
continue;
}
-
+
// Otherwise it must be a lshr which can only be used by one trunc.
if (User->getOpcode() != Instruction::LShr ||
!User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
!isa<ConstantInt>(User->getOperand(1)))
return 0;
-
+
unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
}
}
-
+
// If we have no users, they must be all self uses, just nuke the PHI.
if (PHIUsers.empty())
return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
-
+
// If this phi node is transformable, create new PHIs for all the pieces
// extracted out of it. First, sort the users by their offset and size.
array_pod_sort(PHIUsers.begin(), PHIUsers.end());
-
+
DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
);
-
+
// PredValues - This is a temporary used when rewriting PHI nodes. It is
// hoisted out here to avoid construction/destruction thrashing.
DenseMap<BasicBlock*, Value*> PredValues;
-
+
// ExtractedVals - Each new PHI we introduce is saved here so we don't
// introduce redundant PHIs.
DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
-
+
for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
unsigned PHIId = PHIUsers[UserI].PHIId;
PHINode *PN = PHIsToSlice[PHIId];
unsigned Offset = PHIUsers[UserI].Shift;
Type *Ty = PHIUsers[UserI].Inst->getType();
-
+
PHINode *EltPHI;
-
+
// If we've already lowered a user like this, reuse the previously lowered
// value.
if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
-
+
// Otherwise, Create the new PHI node for this user.
EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
PN->getName()+".off"+Twine(Offset), PN);
assert(EltPHI->getType() != PN->getType() &&
"Truncate didn't shrink phi?");
-
+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
Value *&PredVal = PredValues[Pred];
-
+
// If we already have a value for this predecessor, reuse it.
if (PredVal) {
EltPHI->addIncoming(PredVal, Pred);
@@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
EltPHI->addIncoming(PredVal, Pred);
continue;
}
-
+
if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
// If the incoming value was a PHI, and if it was one of the PHIs we
// already rewrote it, just use the lowered value.
@@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
continue;
}
}
-
+
// Otherwise, do an extract in the predecessor.
Builder->SetInsertPoint(Pred, Pred->getTerminator());
Value *Res = InVal;
@@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
Res = Builder->CreateTrunc(Res, Ty, "extract.t");
PredVal = Res;
EltPHI->addIncoming(Res, Pred);
-
+
// If the incoming value was a PHI, and if it was one of the PHIs we are
// rewriting, we will ultimately delete the code we inserted. This
// means we need to revisit that PHI to make sure we extract out the
@@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (PHIsInspected.count(OldInVal)) {
unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
OldInVal)-PHIsToSlice.begin();
- PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
cast<Instruction>(Res)));
++UserE;
}
}
PredValues.clear();
-
+
DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
<< *EltPHI << '\n');
ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
}
-
+
// Replace the use of this piece with the PHI node.
ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
}
-
+
// Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
// with undefs.
Value *Undef = UndefValue::get(FirstPhi.getType());
@@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (DeadPHICycle(PU, PotentiallyDeadPHIs))
return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
}
-
+
// If this phi has a single use, and if that use just computes a value for
// the next iteration of a loop, delete the phi. This occurs with unused
// induction variables, e.g. "for (int j = 0; ; ++j);". Detecting this
@@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (InValNo != NumIncomingVals) {
Value *NonPhiInVal = PN.getIncomingValue(InValNo);
-
+
// Scan the rest of the operands to see if there are any conflicts, if so
// there is no need to recursively scan other phis.
for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
@@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
break;
}
-
+
// If we scanned over all operands, then we have one unique value plus
// phi values. Scan PHI nodes to see if they all merge in each other or
// the value.
@@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
!TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
-
+
return 0;
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 121aa1f..59502fb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -350,6 +350,68 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return 0;
}
+/// foldSelectICmpAndOr - We want to turn:
+/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
+/// into:
+/// (or (shl (and X, C1), C3), y)
+/// iff:
+/// C1 and C2 are both powers of 2
+/// where:
+/// C3 = Log(C2) - Log(C1)
+///
+/// This transform handles cases where:
+/// 1. The icmp predicate is inverted
+/// 2. The select operands are reversed
+/// 3. The magnitude of C2 and C1 are flipped
+static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
+ Value *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+ if (!IC || !IC->isEquality())
+ return 0;
+
+ Value *CmpLHS = IC->getOperand(0);
+ Value *CmpRHS = IC->getOperand(1);
+
+ if (!match(CmpRHS, m_Zero()))
+ return 0;
+
+ Value *X;
+ const APInt *C1;
+ if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+ return 0;
+
+ const APInt *C2;
+ bool OrOnTrueVal = false;
+ bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
+ if (!OrOnFalseVal)
+ OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
+
+ if (!OrOnFalseVal && !OrOnTrueVal)
+ return 0;
+
+ Value *V = CmpLHS;
+ Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
+
+ unsigned C1Log = C1->logBase2();
+ unsigned C2Log = C2->logBase2();
+ if (C2Log > C1Log) {
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ V = Builder->CreateShl(V, C2Log - C1Log);
+ } else if (C1Log > C2Log) {
+ V = Builder->CreateLShr(V, C1Log - C2Log);
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+ } else
+ V = Builder->CreateZExtOrTrunc(V, Y->getType());
+
+ ICmpInst::Predicate Pred = IC->getPredicate();
+ if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
+ (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+ V = Builder->CreateXor(V, *C2);
+
+ return Builder->CreateOr(V, Y);
+}
+
/// visitSelectInstWithICmp - Visit a SelectInst that has an
/// ICmpInst as its first operand.
///
@@ -521,6 +583,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
+ if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
+ return ReplaceInstUsesWith(SI, V);
+
return Changed ? &SI : 0;
}
@@ -676,7 +741,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// Change: A = select B, false, C --> A = and !B, C
Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
return BinaryOperator::CreateAnd(NotCond, FalseVal);
- } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+ }
+ if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
if (C->getZExtValue() == false) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
@@ -690,14 +756,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// select a, a, b -> a|b
if (CondVal == TrueVal)
return BinaryOperator::CreateOr(CondVal, FalseVal);
- else if (CondVal == FalseVal)
+ if (CondVal == FalseVal)
return BinaryOperator::CreateAnd(CondVal, TrueVal);
// select a, ~a, b -> (~a)&b
// select a, b, ~a -> (~a)|b
if (match(TrueVal, m_Not(m_Specific(CondVal))))
return BinaryOperator::CreateAnd(TrueVal, FalseVal);
- else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+ if (match(FalseVal, m_Not(m_Specific(CondVal))))
return BinaryOperator::CreateOr(TrueVal, FalseVal);
}
@@ -838,7 +904,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *NewFalseOp = NegVal;
if (AddOp != TI)
std::swap(NewTrueOp, NewFalseOp);
- Value *NewSel =
+ Value *NewSel =
Builder->CreateSelect(CondVal, NewTrueOp,
NewFalseOp, SI.getName() + ".p");
@@ -862,7 +928,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
Value *LHS, *RHS, *LHS2, *RHS2;
if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
- if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+ if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
SI, SPF, RHS))
return R;
if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
@@ -908,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
+ if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -918,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return &SI;
}
- if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
- // Form a shufflevector instruction.
- SmallVector<Constant *, 8> Mask(VWidth);
- Type *Int32Ty = Type::getInt32Ty(CV->getContext());
- for (unsigned i = 0; i != VWidth; ++i) {
- Constant *Elem = cast<Constant>(CV->getOperand(i));
- if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
- Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
- else if (isa<UndefValue>(Elem))
- Mask[i] = UndefValue::get(Int32Ty);
- else
- return 0;
- }
- Constant *MaskVal = ConstantVector::get(Mask);
- Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
- return ReplaceInstUsesWith(SI, V);
- }
-
if (isa<ConstantAggregateZero>(CondVal)) {
return ReplaceInstUsesWith(SI, FalseVal);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4f71db1..4301ddb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
return 0;
}
+// If we have a PHI node with a vector type that has only 2 uses: feed
+// itself and be an operand of extractelemnt at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type
+Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+ // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
+ if (!PN->hasNUses(2))
+ return NULL;
+
+ // If so, it's known at this point that one operand is PHI and the other is
+ // an extractelement node. Find the PHI user that is not the extractelement
+ // node.
+ Value::use_iterator iu = PN->use_begin();
+ Instruction *PHIUser = dyn_cast<Instruction>(*iu);
+ if (PHIUser == cast<Instruction>(&EI))
+ PHIUser = cast<Instruction>(*(++iu));
+
+ // Verify that this PHI user has one use, which is the PHI itself,
+ // and that it is a binary operation which is cheap to scalarize.
+ // otherwise return NULL.
+ if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
+ !(isa<BinaryOperator>(PHIUser)) ||
+ !CheapToScalarize(PHIUser, true))
+ return NULL;
+
+ // Create a scalar PHI node that will replace the vector PHI node
+ // just before the current PHI node.
+ PHINode * scalarPHI = cast<PHINode>(
+ InsertNewInstWith(PHINode::Create(EI.getType(),
+ PN->getNumIncomingValues(), ""), *PN));
+ // Scalarize each PHI operand.
+ for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+ Value *PHIInVal = PN->getIncomingValue(i);
+ BasicBlock *inBB = PN->getIncomingBlock(i);
+ Value *Elt = EI.getIndexOperand();
+ // If the operand is the PHI induction variable:
+ if (PHIInVal == PHIUser) {
+ // Scalarize the binary operation. Its first operand is the
+ // scalar PHI and the second operand is extracted from the other
+ // vector operand.
+ BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
+ unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
+ Value *Op = Builder->CreateExtractElement(
+ B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+ Value *newPHIUser = InsertNewInstWith(
+ BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
+ *B0);
+ scalarPHI->addIncoming(newPHIUser, inBB);
+ } else {
+ // Scalarize PHI input:
+ Instruction *newEI =
+ ExtractElementInst::Create(PHIInVal, Elt, "");
+ // Insert the new instruction into the predecessor basic block.
+ Instruction *pos = dyn_cast<Instruction>(PHIInVal);
+ BasicBlock::iterator InsertPos;
+ if (pos && !isa<PHINode>(pos)) {
+ InsertPos = pos;
+ ++InsertPos;
+ } else {
+ InsertPos = inBB->getFirstInsertionPt();
+ }
+
+ InsertNewInstWith(newEI, *InsertPos);
+
+ scalarPHI->addIncoming(newEI, inBB);
+ }
+ }
+ return ReplaceInstUsesWith(EI, scalarPHI);
+}
+
Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
// If vector val is constant with all elements the same, replace EI with
// that element. We handle a known element # below.
@@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
return new BitCastInst(Elt, EI.getType());
}
+
+ // If there's a vector PHI feeding a scalar use through this extractelement
+ // instruction, try to scalarize the PHI.
+ if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
+ Instruction *scalarPHI = scalarizePHI(EI, PN);
+ if (scalarPHI)
+ return (scalarPHI);
+ }
}
if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
// Canonicalize extractelement(cast) -> cast(extractelement)
// bitcasts can change the number of vector elements and they cost nothing
- if (CI->hasOneUse() && EI.hasOneUse() &&
- (CI->getOpcode() != Instruction::BitCast)) {
+ if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
EI.getIndexOperand());
+ Worklist.AddValue(EE);
return CastInst::Create(CI->getOpcode(), EE, EI.getType());
}
}
@@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
if (VecOp == RHS) {
Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+ // Update Mask to reflect that `ScalarOp' has been inserted at
+ // position `InsertedIdx' within the vector returned by IEI.
+ Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
+
// Everything but the extracted element is replaced with the RHS.
for (unsigned i = 0; i != NumElts; ++i) {
if (i != InsertedIdx)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c6115e3..ec10751 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
Module *M = II->getParent()->getParent()->getParent();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
- ArrayRef<Value *>(), "", II->getParent());
+ None, "", II->getParent());
}
return EraseInstFromFunction(MI);
}
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
index 927982d..39de4b0 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp
@@ -110,7 +110,8 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
bool BlackList::isInInit(const GlobalVariable &G) const {
return (isIn(*G.getParent()) ||
inSection("global-init", G.getName()) ||
- inSection("global-init-type", GetGVTypeString(G)));
+ inSection("global-init-type", GetGVTypeString(G)) ||
+ inSection("global-init-src", G.getParent()->getModuleIdentifier()));
}
bool BlackList::inSection(const StringRef Section,
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index 8ba1025..9f35396 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 53a31b0..373168e 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -30,6 +30,7 @@ using namespace llvm::objcarc;
bool llvm::objcarc::EnableARCOpts;
static cl::opt<bool, true>
EnableARCOptimizations("enable-objc-arc-opts",
+ cl::desc("enable/disable all ARC Optimizations"),
cl::location(EnableARCOpts),
cl::init(true));
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index b96c64f..c43f4f4 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -66,6 +66,8 @@ namespace {
Constant *RetainAutoreleaseCallee;
/// Declaration for objc_retainAutoreleaseReturnValue().
Constant *RetainAutoreleaseRVCallee;
+ /// Declaration for objc_retainAutoreleasedReturnValue().
+ Constant *RetainRVCallee;
/// The inline asm string to insert between calls and RetainRV calls to make
/// the optimization work on targets which need it.
@@ -77,9 +79,12 @@ namespace {
SmallPtrSet<CallInst *, 8> StoreStrongCalls;
Constant *getStoreStrongCallee(Module *M);
+ Constant *getRetainRVCallee(Module *M);
Constant *getRetainAutoreleaseCallee(Module *M);
Constant *getRetainAutoreleaseRVCallee(Module *M);
+ bool OptimizeRetainCall(Function &F, Instruction *Retain);
+
bool ContractAutorelease(Function &F, Instruction *Autorelease,
InstructionClass Class,
SmallPtrSet<Instruction *, 4>
@@ -172,6 +177,57 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
return RetainAutoreleaseRVCallee;
}
+Constant *ObjCARCContract::getRetainRVCallee(Module *M) {
+ if (!RetainRVCallee) {
+ LLVMContext &C = M->getContext();
+ Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+ Type *Params[] = { I8X };
+ FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+ AttributeSet Attribute =
+ AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+ Attribute::NoUnwind);
+ RetainRVCallee =
+ M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+ Attribute);
+ }
+ return RetainRVCallee;
+}
+
+/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
+/// return value. We do this late so we do not disrupt the dataflow analysis in
+/// ObjCARCOpt.
+bool
+ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
+ ImmutableCallSite CS(GetObjCArg(Retain));
+ const Instruction *Call = CS.getInstruction();
+ if (!Call)
+ return false;
+ if (Call->getParent() != Retain->getParent())
+ return false;
+
+ // Check that the call is next to the retain.
+ BasicBlock::const_iterator I = Call;
+ ++I;
+ while (IsNoopInstruction(I)) ++I;
+ if (&*I != Retain)
+ return false;
+
+ // Turn it to an objc_retainAutoreleasedReturnValue.
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "Transforming objc_retain => "
+ "objc_retainAutoreleasedReturnValue since the operand is a "
+ "return value.\nOld: "<< *Retain << "\n");
+
+ // We do not have to worry about tail calls/does not throw since
+ // retain/retainRV have the same properties.
+ cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+
+ DEBUG(dbgs() << "New: " << *Retain << "\n");
+ return true;
+}
+
/// Merge an autorelease with a retain into a fused call.
bool
ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
@@ -329,6 +385,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
StoreStrongCallee = 0;
RetainAutoreleaseCallee = 0;
RetainAutoreleaseRVCallee = 0;
+ RetainRVCallee = 0;
// Initialize RetainRVMarker.
RetainRVMarker = 0;
@@ -380,7 +437,6 @@ bool ObjCARCContract::runOnFunction(Function &F) {
// objc_retainBlock does not necessarily return its argument.
InstructionClass Class = GetBasicInstructionClass(Inst);
switch (Class) {
- case IC_Retain:
case IC_FusedRetainAutorelease:
case IC_FusedRetainAutoreleaseRV:
break;
@@ -389,6 +445,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
continue;
break;
+ case IC_Retain:
+ // Attempt to convert retains to retainrvs if they are next to function
+ // calls.
+ if (!OptimizeRetainCall(F, Inst))
+ break;
+ // If we succeed in our optimization, fall through.
+ // FALLTHROUGH
case IC_RetainRV: {
// If we're compiling for a target which needs a special inline-asm
// marker to do the retainAutoreleasedReturnValue optimization,
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 92d6fc4..43e2e20 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -191,13 +191,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
do {
const Value *V = Worklist.pop_back_val();
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+ DEBUG(dbgs() << "Visiting: " << *V << "\n");
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const User *UUser = *UI;
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+ DEBUG(dbgs() << "User: " << *UUser << "\n");
// Special - Use by a call (callee or argument) is not considered
// to be an escape.
@@ -207,8 +207,7 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
case IC_StoreStrong:
case IC_Autorelease:
case IC_AutoreleaseRV: {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
- "arguments. Pointer Escapes!\n");
+ DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
// These special functions make copies of their pointer arguments.
return true;
}
@@ -223,12 +222,11 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
if (VisitedSet.insert(UUser)) {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
- "Ptr escapes if result escapes. Adding to list.\n");
+ DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
+ " Adding to list.\n");
Worklist.push_back(UUser);
} else {
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
- "\n");
+ DEBUG(dbgs() << "Already visited node.\n");
}
continue;
}
@@ -245,13 +243,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
continue;
}
// Otherwise, conservatively assume an escape.
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+ DEBUG(dbgs() << "Assuming ptr escapes.\n");
return true;
}
} while (!Worklist.empty());
// No escapes found.
- DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+ DEBUG(dbgs() << "Ptr does not escape.\n");
return false;
}
@@ -305,6 +303,16 @@ STATISTIC(NumRets, "Number of return value forwarding "
"retain+autoreleaes eliminated");
STATISTIC(NumRRs, "Number of retain+release paths eliminated");
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
+STATISTIC(NumRetainsBeforeOpt,
+ "Number of retains before optimization.");
+STATISTIC(NumReleasesBeforeOpt,
+ "Number of releases before optimization.");
+#ifndef NDEBUG
+STATISTIC(NumRetainsAfterOpt,
+ "Number of retains after optimization.");
+STATISTIC(NumReleasesAfterOpt,
+ "Number of releases after optimization.");
+#endif
namespace {
/// \enum Sequence
@@ -375,7 +383,7 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
namespace {
/// \brief Unidirectional information about either a
/// retain-decrement-use-release sequence or release-use-decrement-retain
- /// reverese sequence.
+ /// reverse sequence.
struct RRInfo {
/// After an objc_retain, the reference count of the referenced
/// object is known to be positive. Similarly, before an objc_release, the
@@ -410,6 +418,10 @@ namespace {
KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
void clear();
+
+ bool IsTrackingImpreciseReleases() {
+ return ReleaseMetadata != 0;
+ }
};
}
@@ -428,7 +440,7 @@ namespace {
/// True if the reference count is known to be incremented.
bool KnownPositiveRefCount;
- /// True of we've seen an opportunity for partial RR elimination, such as
+ /// True if we've seen an opportunity for partial RR elimination, such as
/// pushing calls into a CFG triangle or into one side of a CFG diamond.
bool Partial;
@@ -457,6 +469,7 @@ namespace {
}
void SetSeq(Sequence NewSeq) {
+ DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
Seq = NewSeq;
}
@@ -469,7 +482,8 @@ namespace {
}
void ResetSequenceProgress(Sequence NewSeq) {
- Seq = NewSeq;
+ DEBUG(dbgs() << "Resetting sequence progress.\n");
+ SetSeq(NewSeq);
Partial = false;
RRI.clear();
}
@@ -706,7 +720,19 @@ void BBState::MergeSucc(const BBState &Other) {
/// Enable/disable ARC sequence annotations.
static cl::opt<bool>
-EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false));
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
+ cl::desc("Enable emission of arc data flow analysis "
+ "annotations"));
+static cl::opt<bool>
+DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
+ cl::desc("Disable check for cfg hazards when "
+ "annotating"));
+static cl::opt<std::string>
+ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
+ cl::init(""),
+ cl::desc("filter out all data flow annotations "
+ "but those that apply to the given "
+ "target llvm identifier."));
/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
/// instruction so that we can track backwards when post processing via the llvm
@@ -791,6 +817,12 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId,
/// state of a pointer at the entrance to a basic block.
static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
Value *Ptr, Sequence Seq) {
+ // If we have a target identifier, make sure that we match it before
+ // continuing.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
Module *M = BB->getParent()->getParent();
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
@@ -828,6 +860,12 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
/// of the pointer at the bottom of the basic block.
static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
Value *Ptr, Sequence Seq) {
+ // If we have a target identifier, make sure that we match it before emitting
+ // an annotation.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
Module *M = BB->getParent()->getParent();
LLVMContext &C = M->getContext();
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
@@ -869,6 +907,12 @@ static void GenerateARCAnnotation(unsigned InstMDId,
Sequence OldSeq,
Sequence NewSeq) {
if (EnableARCAnnotations) {
+ // If we have a target identifier, make sure that we match it before
+ // emitting an annotation.
+ if(!ARCAnnotationTargetIdentifier.empty() &&
+ !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+ return;
+
// First generate the source annotation on our pointer. This will return an
// MDString* if Ptr actually comes from an instruction implying we can put
// in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
@@ -909,27 +953,27 @@ static void GenerateARCAnnotation(unsigned InstMDId,
#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \
do { \
- if (EnableARCAnnotations) { \
- for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
+ if (EnableARCAnnotations) { \
+ for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
E = (_states)._direction##_ptr_end(); I != E; ++I) { \
- Value *Ptr = const_cast<Value*>(I->first); \
- Sequence Seq = I->second.GetSeq(); \
- GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \
+ Value *Ptr = const_cast<Value*>(I->first); \
+ Sequence Seq = I->second.GetSeq(); \
+ GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \
+ } \
} \
- } \
-} while (0)
+ } while (0)
-#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
+#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
Entrance, bottom_up)
-#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
+#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
Terminator, bottom_up)
-#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
+#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
Entrance, top_down)
-#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
+#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
+ ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
Terminator, top_down)
#else // !ARC_ANNOTATION
@@ -955,9 +999,6 @@ namespace {
/// them. These are initialized lazily to avoid cluttering up the Module
/// with unused declarations.
- /// Declaration for ObjC runtime function
- /// objc_retainAutoreleasedReturnValue.
- Constant *RetainRVCallee;
/// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
Constant *AutoreleaseRVCallee;
/// Declaration for ObjC runtime function objc_release.
@@ -991,7 +1032,6 @@ namespace {
unsigned ARCAnnotationProvenanceSourceMDKind;
#endif // ARC_ANNOATIONS
- Constant *getRetainRVCallee(Module *M);
Constant *getAutoreleaseRVCallee(Module *M);
Constant *getReleaseCallee(Module *M);
Constant *getRetainCallee(Module *M);
@@ -1000,7 +1040,6 @@ namespace {
bool IsRetainBlockOptimizable(const Instruction *Inst);
- void OptimizeRetainCall(Function &F, Instruction *Retain);
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
InstructionClass &Class);
@@ -1059,6 +1098,10 @@ namespace {
void OptimizeReturns(Function &F);
+#ifndef NDEBUG
+ void GatherStatistics(Function &F, bool AfterOptimization = false);
+#endif
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool doInitialization(Module &M);
virtual bool runOnFunction(Function &F);
@@ -1106,22 +1149,6 @@ bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
return true;
}
-Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
- if (!RetainRVCallee) {
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *Params[] = { I8X };
- FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attribute =
- AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
- RetainRVCallee =
- M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
- Attribute);
- }
- return RetainRVCallee;
-}
-
Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
if (!AutoreleaseRVCallee) {
LLVMContext &C = M->getContext();
@@ -1201,38 +1228,6 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
return AutoreleaseCallee;
}
-/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
-/// return value.
-void
-ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
- ImmutableCallSite CS(GetObjCArg(Retain));
- const Instruction *Call = CS.getInstruction();
- if (!Call) return;
- if (Call->getParent() != Retain->getParent()) return;
-
- // Check that the call is next to the retain.
- BasicBlock::const_iterator I = Call;
- ++I;
- while (IsNoopInstruction(I)) ++I;
- if (&*I != Retain)
- return;
-
- // Turn it to an objc_retainAutoreleasedReturnValue..
- Changed = true;
- ++NumPeeps;
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
- "objc_retain => objc_retainAutoreleasedReturnValue"
- " since the operand is a return value.\n"
- " Old: "
- << *Retain << "\n");
-
- cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
-
- DEBUG(dbgs() << " New: "
- << *Retain << "\n");
-}
-
/// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
/// not a return value. Or, if it can be paired with an
/// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -1269,9 +1264,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
- << " Erasing " << *RetainRV
- << "\n");
+ DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
+ << "Erasing " << *RetainRV << "\n");
EraseInstruction(I);
EraseInstruction(RetainRV);
@@ -1283,16 +1277,13 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
- "objc_retainAutoreleasedReturnValue => "
+ DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
"objc_retain since the operand is not a return value.\n"
- " Old: "
- << *RetainRV << "\n");
+ "Old = " << *RetainRV << "\n");
cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
- DEBUG(dbgs() << " New: "
- << *RetainRV << "\n");
+ DEBUG(dbgs() << "New = " << *RetainRV << "\n");
return false;
}
@@ -1321,12 +1312,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
Changed = true;
++NumPeeps;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
- "objc_autoreleaseReturnValue => "
+ DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => "
"objc_autorelease since its operand is not used as a return "
"value.\n"
- " Old: "
- << *AutoreleaseRV << "\n");
+ "Old = " << *AutoreleaseRV << "\n");
CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
AutoreleaseRVCI->
@@ -1334,8 +1323,7 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
Class = IC_Autorelease;
- DEBUG(dbgs() << " New: "
- << *AutoreleaseRV << "\n");
+ DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
}
@@ -1359,18 +1347,24 @@ ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
if (!IsRetainBlockOptimizable(Inst))
return false;
+ Changed = true;
+ ++NumPeeps;
+
+ DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
+ DEBUG(dbgs() << "Old: " << *Inst << "\n");
CallInst *RetainBlock = cast<CallInst>(Inst);
RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
// Remove copy_on_escape metadata.
RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
Class = IC_Retain;
-
+ DEBUG(dbgs() << "New: " << *Inst << "\n");
return true;
}
/// Visit each call, one at a time, and make simplifications without doing any
/// additional analysis.
void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
// Reset all the flags in preparation for recomputing them.
UsedInThisFunction = 0;
@@ -1380,8 +1374,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
InstructionClass Class = GetBasicInstructionClass(Inst);
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
- << Class << "; " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
switch (Class) {
default: break;
@@ -1397,8 +1390,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_NoopCast:
Changed = true;
++NumNoops;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
- " " << *Inst << "\n");
+ DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
EraseInstruction(Inst);
continue;
@@ -1416,11 +1408,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
Constant::getNullValue(Ty),
CI);
llvm::Value *NewValue = UndefValue::get(CI->getType());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
- "pointer-to-weak-pointer is undefined behavior.\n"
- " Old = " << *CI <<
- "\n New = " <<
- *NewValue << "\n");
+ DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+ "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
CI->replaceAllUsesWith(NewValue);
CI->eraseFromParent();
continue;
@@ -1439,11 +1428,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CI);
llvm::Value *NewValue = UndefValue::get(CI->getType());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
- "pointer-to-weak-pointer is undefined behavior.\n"
- " Old = " << *CI <<
- "\n New = " <<
- *NewValue << "\n");
+ DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+ "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
CI->replaceAllUsesWith(NewValue);
CI->eraseFromParent();
@@ -1452,13 +1438,13 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
break;
}
case IC_RetainBlock:
- // If we strength reduce an objc_retainBlock to amn objc_retain, continue
+ // If we strength reduce an objc_retainBlock to an objc_retain, continue
// onto the objc_retain peephole optimizations. Otherwise break.
if (!OptimizeRetainBlockCall(F, Inst, Class))
break;
// FALLTHROUGH
case IC_Retain:
- OptimizeRetainCall(F, Inst);
+ ++NumRetainsBeforeOpt;
break;
case IC_RetainRV:
if (OptimizeRetainRVCall(F, Inst))
@@ -1467,6 +1453,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
case IC_AutoreleaseRV:
OptimizeAutoreleaseRVCall(F, Inst, Class);
break;
+ case IC_Release:
+ ++NumReleasesBeforeOpt;
+ break;
}
// objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1483,15 +1472,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
CallInst *NewCall =
CallInst::Create(getReleaseCallee(F.getParent()),
Call->getArgOperand(0), "", Call);
- NewCall->setMetadata(ImpreciseReleaseMDKind,
- MDNode::get(C, ArrayRef<Value *>()));
+ NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
- "objc_autorelease(x) with objc_release(x) since x is "
- "otherwise unused.\n"
- " Old: " << *Call <<
- "\n New: " <<
- *NewCall << "\n");
+ DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
+ "since x is otherwise unused.\nOld: " << *Call << "\nNew: "
+ << *NewCall << "\n");
EraseInstruction(Call);
Inst = NewCall;
@@ -1503,9 +1488,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// a tail keyword.
if (IsAlwaysTail(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
- " to function since it can never be passed stack args: " << *Inst <<
- "\n");
+ DEBUG(dbgs() << "Adding tail keyword to function since it can never be "
+ "passed stack args: " << *Inst << "\n");
cast<CallInst>(Inst)->setTailCall();
}
@@ -1513,8 +1497,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// semantics of ARC truly do not do so.
if (IsNeverTail(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
- "keyword from function: " << *Inst <<
+ DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst <<
"\n");
cast<CallInst>(Inst)->setTailCall(false);
}
@@ -1522,8 +1505,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Set nounwind as needed.
if (IsNoThrow(Class)) {
Changed = true;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
- " class. Setting nounwind on: " << *Inst << "\n");
+ DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
+ << "\n");
cast<CallInst>(Inst)->setDoesNotThrow();
}
@@ -1538,8 +1521,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
if (IsNullOrUndef(Arg)) {
Changed = true;
++NumNoops;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
- " null are no-ops. Erasing: " << *Inst << "\n");
+ DEBUG(dbgs() << "ARC calls with null are no-ops. Erasing: " << *Inst
+ << "\n");
EraseInstruction(Inst);
continue;
}
@@ -1633,10 +1616,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
Clone->setArgOperand(0, Op);
Clone->insertBefore(InsertPos);
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+ DEBUG(dbgs() << "Cloning "
<< *CInst << "\n"
- " And inserting "
- "clone at " << *InsertPos << "\n");
+ "And inserting clone at " << *InsertPos << "\n");
Worklist.push_back(std::make_pair(Clone, Incoming));
}
}
@@ -1648,7 +1630,65 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
}
} while (!Worklist.empty());
}
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// If we have a top down pointer in the S_Use state, make sure that there are
+/// no CFG hazards by checking the states of various bottom up pointers.
+static void CheckForUseCFGHazard(const Sequence SuccSSeq,
+ const bool SuccSRRIKnownSafe,
+ PtrState &S,
+ bool &SomeSuccHasSame,
+ bool &AllSuccsHaveSame,
+ bool &ShouldContinue) {
+ switch (SuccSSeq) {
+ case S_CanRelease: {
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+ S.ClearSequenceProgress();
+ break;
+ }
+ ShouldContinue = true;
+ break;
+ }
+ case S_Use:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ case S_None:
+ llvm_unreachable("This should have been handled earlier.");
+ }
+}
+
+/// If we have a Top Down pointer in the S_CanRelease state, make sure that
+/// there are no CFG hazards by checking the states of various bottom up
+/// pointers.
+static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
+ const bool SuccSRRIKnownSafe,
+ PtrState &S,
+ bool &SomeSuccHasSame,
+ bool &AllSuccsHaveSame) {
+ switch (SuccSSeq) {
+ case S_CanRelease:
+ SomeSuccHasSame = true;
+ break;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+ AllSuccsHaveSame = false;
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ case S_None:
+ llvm_unreachable("This should have been handled earlier.");
+ }
}
/// Check for critical edges, loop boundaries, irreducible control flow, or
@@ -1661,106 +1701,82 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
// If any top-down local-use or possible-dec has a succ which is earlier in
// the sequence, forget it.
for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
- E = MyStates.top_down_ptr_end(); I != E; ++I)
- switch (I->second.GetSeq()) {
- default: break;
- case S_Use: {
- const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- bool SomeSuccHasSame = false;
- bool AllSuccsHaveSame = true;
- PtrState &S = I->second;
- succ_const_iterator SI(TI), SE(TI, false);
-
- for (; SI != SE; ++SI) {
- Sequence SuccSSeq = S_None;
- bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has pointer information for this successor, take
- // what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
- assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- switch (SuccSSeq) {
- case S_None:
- case S_CanRelease: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
- S.ClearSequenceProgress();
- break;
- }
- continue;
- }
- case S_Use:
- SomeSuccHasSame = true;
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
- AllSuccsHaveSame = false;
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
- // If the state at the other end of any of the successor edges
- // matches the current state, require all edges to match. This
- // guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
+ E = MyStates.top_down_ptr_end(); I != E; ++I) {
+ PtrState &S = I->second;
+ const Sequence Seq = I->second.GetSeq();
+
+ // We only care about S_Retain, S_CanRelease, and S_Use.
+ if (Seq == S_None)
+ continue;
+
+ // Make sure that if extra top down states are added in the future that this
+ // code is updated to handle it.
+ assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
+ "Unknown top down sequence state.");
+
+ const Value *Arg = I->first;
+ const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+ bool SomeSuccHasSame = false;
+ bool AllSuccsHaveSame = true;
+
+ succ_const_iterator SI(TI), SE(TI, false);
+
+ for (; SI != SE; ++SI) {
+ // If VisitBottomUp has pointer information for this successor, take
+ // what we know about it.
+ const DenseMap<const BasicBlock *, BBState>::iterator BBI =
+ BBStates.find(*SI);
+ assert(BBI != BBStates.end());
+ const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ const Sequence SuccSSeq = SuccS.GetSeq();
+
+ // If bottom up, the pointer is in an S_None state, clear the sequence
+ // progress since the sequence in the bottom up state finished
+ // suggesting a mismatch in between retains/releases. This is true for
+ // all three cases that we are handling here: S_Retain, S_Use, and
+ // S_CanRelease.
+ if (SuccSSeq == S_None) {
S.ClearSequenceProgress();
- break;
- }
- case S_CanRelease: {
- const Value *Arg = I->first;
- const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
- bool SomeSuccHasSame = false;
- bool AllSuccsHaveSame = true;
- PtrState &S = I->second;
- succ_const_iterator SI(TI), SE(TI, false);
-
- for (; SI != SE; ++SI) {
- Sequence SuccSSeq = S_None;
- bool SuccSRRIKnownSafe = false;
- // If VisitBottomUp has pointer information for this successor, take
- // what we know about it.
- DenseMap<const BasicBlock *, BBState>::iterator BBI =
- BBStates.find(*SI);
- assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
- SuccSSeq = SuccS.GetSeq();
- SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
- switch (SuccSSeq) {
- case S_None: {
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
- S.ClearSequenceProgress();
- break;
- }
+ continue;
+ }
+
+ // If we have S_Use or S_CanRelease, perform our check for cfg hazard
+ // checks.
+ const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+
+ // *NOTE* We do not use Seq from above here since we are allowing for
+ // S.GetSeq() to change while we are visiting basic blocks.
+ switch(S.GetSeq()) {
+ case S_Use: {
+ bool ShouldContinue = false;
+ CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
+ SomeSuccHasSame, AllSuccsHaveSame,
+ ShouldContinue);
+ if (ShouldContinue)
continue;
- }
- case S_CanRelease:
- SomeSuccHasSame = true;
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- case S_Use:
- if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
- AllSuccsHaveSame = false;
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
+ break;
+ }
+ case S_CanRelease: {
+ CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe,
+ S, SomeSuccHasSame,
+ AllSuccsHaveSame);
+ break;
+ }
+ case S_Retain:
+ case S_None:
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ break;
}
- // If the state at the other end of any of the successor edges
- // matches the current state, require all edges to match. This
- // guards against loops in the middle of a sequence.
- if (SomeSuccHasSame && !AllSuccsHaveSame)
- S.ClearSequenceProgress();
- break;
- }
}
+
+ // If the state at the other end of any of the successor edges
+ // matches the current state, require all edges to match. This
+ // guards against loops in the middle of a sequence.
+ if (SomeSuccHasSame && !AllSuccsHaveSame)
+ S.ClearSequenceProgress();
+ }
}
bool
@@ -1772,6 +1788,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
InstructionClass Class = GetInstructionClass(Inst);
const Value *Arg = 0;
+ DEBUG(dbgs() << "Class: " << Class << "\n");
+
switch (Class) {
case IC_Release: {
Arg = GetObjCArg(Inst);
@@ -1786,8 +1804,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// pairs by making PtrState hold a stack of states, but this is
// simple and avoids adding overhead for the non-nested case.
if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
- DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
- "releases (i.e. a release pair)\n");
+ DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
NestingDetected = true;
}
@@ -1820,7 +1837,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_Release:
case S_MovableRelease:
case S_Use:
- S.RRI.ReverseInsertPts.clear();
+ // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+ // imprecise release, clear our reverse insertion points.
+ if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases())
+ S.RRI.ReverseInsertPts.clear();
// FALL THROUGH
case S_CanRelease:
// Don't do retain+release tracking for IC_RetainRV, because it's
@@ -1835,7 +1855,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
llvm_unreachable("bottom-up pointer in retain state!");
}
ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
- return NestingDetected;
+ // A retain moving bottom up can be a use.
+ break;
}
case IC_AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
@@ -1861,6 +1882,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.ClearKnownPositiveRefCount();
switch (Seq) {
case S_Use:
@@ -1883,6 +1906,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case S_Release:
case S_MovableRelease:
if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
assert(S.RRI.ReverseInsertPts.empty());
// If this is an invoke instruction, we're scanning it as part of
// one of its successor blocks, since we can't insert code after it
@@ -1894,6 +1919,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
S.SetSeq(S_Use);
ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
} else if (Seq == S_Release && IsUser(Class)) {
+ DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
// Non-movable releases depend on any possible objc pointer use.
S.SetSeq(S_Stop);
ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
@@ -1907,6 +1934,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
break;
case S_Stop:
if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.SetSeq(S_Use);
ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
}
@@ -1927,6 +1956,9 @@ bool
ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
MapVector<Value *, RRInfo> &Retains) {
+
+ DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
+
bool NestingDetected = false;
BBState &MyStates = BBStates[BB];
@@ -1960,7 +1992,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (isa<InvokeInst>(Inst))
continue;
- DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
}
@@ -2033,13 +2065,18 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
PtrState &S = MyStates.getPtrTopDownState(Arg);
S.ClearKnownPositiveRefCount();
- switch (S.GetSeq()) {
+ Sequence OldSeq = S.GetSeq();
+
+ MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+
+ switch (OldSeq) {
case S_Retain:
case S_CanRelease:
- S.RRI.ReverseInsertPts.clear();
+ if (OldSeq == S_Retain || ReleaseMetadata != 0)
+ S.RRI.ReverseInsertPts.clear();
// FALL THROUGH
case S_Use:
- S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+ S.RRI.ReleaseMetadata = ReleaseMetadata;
S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
Releases[Inst] = S.RRI;
ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
@@ -2078,6 +2115,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
// Check for possible releases.
if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.ClearKnownPositiveRefCount();
switch (Seq) {
case S_Retain:
@@ -2105,6 +2144,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
switch (Seq) {
case S_CanRelease:
if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+ << "\n");
S.SetSeq(S_Use);
ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
}
@@ -2127,6 +2168,7 @@ bool
ObjCARCOpt::VisitTopDown(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
DenseMap<Value *, RRInfo> &Releases) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
bool NestingDetected = false;
BBState &MyStates = BBStates[BB];
@@ -2156,7 +2198,7 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Instruction *Inst = I;
- DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << "Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
}
@@ -2165,6 +2207,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
// bottom of the basic block.
ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
+#ifdef ARC_ANNOTATIONS
+ if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
+#endif
CheckForCFGHazards(BB, BBStates, MyStates);
return NestingDetected;
}
@@ -2296,6 +2341,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Type *ArgTy = Arg->getType();
Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
+ DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
+
// Insert the new retain and release calls.
for (SmallPtrSet<Instruction *, 2>::const_iterator
PI = ReleasesToMove.ReverseInsertPts.begin(),
@@ -2308,10 +2355,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Call->setDoesNotThrow();
Call->setTailCall();
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
- << "\n"
- " At insertion point: " << *InsertPt
- << "\n");
+ DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n"
+ "At insertion point: " << *InsertPt << "\n");
}
for (SmallPtrSet<Instruction *, 2>::const_iterator
PI = RetainsToMove.ReverseInsertPts.begin(),
@@ -2328,10 +2373,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
if (ReleasesToMove.IsTailCallRelease)
Call->setTailCall();
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
- << "\n"
- " At insertion point: " << *InsertPt
- << "\n");
+ DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n"
+ "At insertion point: " << *InsertPt << "\n");
}
// Delete the original retain and release calls.
@@ -2341,8 +2384,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *OrigRetain = *AI;
Retains.blot(OrigRetain);
DeadInsts.push_back(OrigRetain);
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
- "\n");
+ DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
}
for (SmallPtrSet<Instruction *, 2>::const_iterator
AI = ReleasesToMove.Calls.begin(),
@@ -2350,9 +2392,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
Instruction *OrigRelease = *AI;
Releases.erase(OrigRelease);
DeadInsts.push_back(OrigRelease);
- DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
- << "\n");
+ DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
}
+
}
bool
@@ -2506,6 +2548,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (OldDelta != 0)
return false;
+#ifdef ARC_ANNOTATIONS
+ // Do not move calls if ARC annotations are requested.
+ if (EnableARCAnnotations)
+ return false;
+#endif // ARC_ANNOTATIONS
+
Changed = true;
assert(OldCount != 0 && "Unreachable code?");
NumRRs += OldCount - NewCount;
@@ -2524,6 +2572,8 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
MapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
Module *M) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
+
bool AnyPairsCompletelyEliminated = false;
RRInfo RetainsToMove;
RRInfo ReleasesToMove;
@@ -2539,8 +2589,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
Instruction *Retain = cast<Instruction>(V);
- DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
- << "\n");
+ DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
Value *Arg = GetObjCArg(Retain);
@@ -2567,12 +2616,6 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
ReleasesToMove, Arg, KnownSafe,
AnyPairsCompletelyEliminated);
-#ifdef ARC_ANNOTATIONS
- // Do not move calls if ARC annotations are requested. If we were to move
- // calls in this case, we would not be able
- PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations;
-#endif // ARC_ANNOTATIONS
-
if (PerformMoveCalls) {
// Ok, everything checks out and we're all set. Let's move/delete some
// code!
@@ -2597,14 +2640,15 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
/// Weak pointer optimizations.
void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
+
// First, do memdep-style RLE and S2L optimizations. We can't use memdep
// itself because it uses AliasAnalysis and we need to do provenance
// queries instead.
for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
Instruction *Inst = &*I++;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
- "\n");
+ DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
InstructionClass Class = GetBasicInstructionClass(Inst);
if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
@@ -2752,9 +2796,6 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
done:;
}
}
-
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
-
}
/// Identify program paths which execute sequences of retains and releases which
@@ -2820,17 +2861,17 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
BB, Autorelease, DepInsts, Visited, PA);
if (DepInsts.size() != 1)
return 0;
-
+
CallInst *Retain =
dyn_cast_or_null<CallInst>(*DepInsts.begin());
-
+
// Check that we found a retain with the same argument.
if (!Retain ||
!IsRetain(GetBasicInstructionClass(Retain)) ||
GetObjCArg(Retain) != Arg) {
return 0;
}
-
+
return Retain;
}
@@ -2847,7 +2888,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
BB, Ret, DepInsts, V, PA);
if (DepInsts.size() != 1)
return 0;
-
+
CallInst *Autorelease =
dyn_cast_or_null<CallInst>(*DepInsts.begin());
if (!Autorelease)
@@ -2857,7 +2898,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
return 0;
if (GetObjCArg(Autorelease) != Arg)
return 0;
-
+
return Autorelease;
}
@@ -2873,60 +2914,87 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
if (!F.getReturnType()->isPointerTy())
return;
+ DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
+
SmallPtrSet<Instruction *, 4> DependingInstructions;
SmallPtrSet<const BasicBlock *, 4> Visited;
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
BasicBlock *BB = FI;
ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+ DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
if (!Ret)
continue;
-
+
const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
-
- // Look for an ``autorelease'' instruction that is a predecssor of Ret and
+
+ // Look for an ``autorelease'' instruction that is a predecessor of Ret and
// dependent on Arg such that there are no instructions dependent on Arg
// that need a positive ref count in between the autorelease and Ret.
CallInst *Autorelease =
FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
DependingInstructions, Visited,
PA);
- if (Autorelease) {
- DependingInstructions.clear();
- Visited.clear();
-
- CallInst *Retain =
- FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
- DependingInstructions, Visited, PA);
- if (Retain) {
- DependingInstructions.clear();
- Visited.clear();
-
- // Check that there is nothing that can affect the reference count
- // between the retain and the call. Note that Retain need not be in BB.
- if (HasSafePathToPredecessorCall(Arg, Retain, DependingInstructions,
- Visited, PA)) {
- // If so, we can zap the retain and autorelease.
- Changed = true;
- ++NumRets;
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
- << "\n Erasing: "
- << *Autorelease << "\n");
- EraseInstruction(Retain);
- EraseInstruction(Autorelease);
- }
- }
- }
-
DependingInstructions.clear();
Visited.clear();
+
+ if (!Autorelease)
+ continue;
+
+ CallInst *Retain =
+ FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
+ DependingInstructions, Visited, PA);
+ DependingInstructions.clear();
+ Visited.clear();
+
+ if (!Retain)
+ continue;
+
+ // Check that there is nothing that can affect the reference count
+ // between the retain and the call. Note that Retain need not be in BB.
+ bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain,
+ DependingInstructions,
+ Visited, PA);
+ DependingInstructions.clear();
+ Visited.clear();
+
+ if (!HasSafePathToCall)
+ continue;
+
+ // If so, we can zap the retain and autorelease.
+ Changed = true;
+ ++NumRets;
+ DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: "
+ << *Autorelease << "\n");
+ EraseInstruction(Retain);
+ EraseInstruction(Autorelease);
}
+}
- DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
+#ifndef NDEBUG
+void
+ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
+ llvm::Statistic &NumRetains =
+ AfterOptimization? NumRetainsAfterOpt : NumRetainsBeforeOpt;
+ llvm::Statistic &NumReleases =
+ AfterOptimization? NumReleasesAfterOpt : NumReleasesBeforeOpt;
+ for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+ Instruction *Inst = &*I++;
+ switch (GetBasicInstructionClass(Inst)) {
+ default:
+ break;
+ case IC_Retain:
+ ++NumRetains;
+ break;
+ case IC_Release:
+ ++NumReleases;
+ break;
+ }
+ }
}
+#endif
bool ObjCARCOpt::doInitialization(Module &M) {
if (!EnableARCOpts)
@@ -2958,7 +3026,6 @@ bool ObjCARCOpt::doInitialization(Module &M) {
// calls finalizers which can have arbitrary side effects.
// These are initialized lazily.
- RetainRVCallee = 0;
AutoreleaseRVCallee = 0;
ReleaseCallee = 0;
RetainCallee = 0;
@@ -2978,7 +3045,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
Changed = false;
- DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+ DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
+ "\n");
PA.setAA(&getAnalysis<AliasAnalysis>());
@@ -2986,7 +3054,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
// when compiling code that isn't ObjC, skip these if the relevant ObjC
// library functions aren't declared.
- // Preliminary optimizations. This also computs UsedInThisFunction.
+ // Preliminary optimizations. This also computes UsedInThisFunction.
OptimizeIndividualCalls(F);
// Optimizations for weak pointers.
@@ -3013,6 +3081,13 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
(1 << IC_AutoreleaseRV)))
OptimizeReturns(F);
+ // Gather statistics after optimization.
+#ifndef NDEBUG
+ if (AreStatisticsEnabled()) {
+ GatherStatistics(F, true);
+ }
+#endif
+
DEBUG(dbgs() << "\n");
return Changed;
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 015fd2e..f0d29c8 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -88,7 +89,7 @@ namespace {
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
+ ValueMap<Value*, Value*> SunkAddrs;
/// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
/// be updated.
@@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// start of the block.
CurInstIterator = BB->begin();
SunkAddrs.clear();
- } else {
- // This address is now available for reassignment, so erase the table
- // entry; we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
}
}
++NumMemoryInsts;
@@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
if (!DefIsLiveOut)
return false;
- // Make sure non of the uses are PHI nodes.
+ // Make sure none of the uses are PHI nodes.
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index 129af8d..f350b9b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
+ class GVN;
+ struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+ };
class GVN : public FunctionPass {
bool NoLoads;
@@ -519,6 +588,11 @@ namespace {
BumpPtrAllocator TableAllocator;
SmallVector<Instruction*, 8> InstrsToErase;
+
+ typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
+ typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+ typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
@@ -599,11 +673,17 @@ namespace {
}
- // Helper fuctions
- // FIXME: eliminate or document these better
+ // Helper fuctions of redundant load elimination
bool processLoad(LoadInst *L);
- bool processInstruction(Instruction *I);
bool processNonLocalLoad(LoadInst *L);
+ void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+ bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+
+ // Other helper routines
+ bool processInstruction(Instruction *I);
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
@@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
-namespace {
-
-struct AvailableValueInBlock {
- /// BB - The basic block in question.
- BasicBlock *BB;
- enum ValType {
- SimpleVal, // A simple offsetted value that is accessed.
- LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
- };
-
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
-
- /// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
-
- static AvailableValueInBlock get(BasicBlock *BB, Value *V,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(LI);
- Res.Val.setInt(LoadVal);
- Res.Offset = Offset;
- return Res;
- }
-
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-
- Value *getSimpleValue() const {
- assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
- }
-
- LoadInst *getCoercedLoadValue() const {
- assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
- }
-
- MemIntrinsic *getMemIntrinValue() const {
- assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
- }
-
- /// MaterializeAdjustedValue - Emit code into this block to adjust the value
- /// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
- Value *Res;
- if (isSimpleValue()) {
- Res = getSimpleValue();
- if (Res->getType() != LoadTy) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *TD);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
- << *getSimpleValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else if (isCoercedLoadValue()) {
- LoadInst *Load = getCoercedLoadValue();
- if (Load->getType() == LoadTy && Offset == 0) {
- Res = Load;
- } else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
- gvn);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
- << *getCoercedLoadValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
- DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
- << " " << *getMemIntrinValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- return Res;
- }
-};
-
-} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
@@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
+Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+}
+
static bool isLifetimeStart(const Instruction *Inst) {
if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
return II->getIntrinsicID() == Intrinsic::lifetime_start;
return false;
}
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
-/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI) {
- // Find the non-local dependencies of the load.
- SmallVector<NonLocalDepResult, 64> Deps;
- AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
- MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
- //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
- // << Deps.size() << *LI << '\n');
-
- // If we had to process more than one hundred blocks to find the
- // dependencies, this load isn't worth worrying about. Optimizing
- // it will be too expensive.
- unsigned NumDeps = Deps.size();
- if (NumDeps > 100)
- return false;
-
- // If we had a phi translation failure, we'll have a single entry which is a
- // clobber in the current block. Reject this early.
- if (NumDeps == 1 &&
- !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
- DEBUG(
- dbgs() << "GVN: non-local load ";
- WriteAsOperand(dbgs(), LI);
- dbgs() << " has unknown dependencies\n";
- );
- return false;
- }
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
- SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
- SmallVector<BasicBlock*, 64> UnavailableBlocks;
-
+ unsigned NumDeps = Deps.size();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
}
UnavailableBlocks.push_back(DepBB);
- continue;
}
+}
- // If we have no predecessors that produce a known value for this load, exit
- // early.
- if (ValuesPerBlock.empty()) return false;
-
- // If all of the instructions we depend on produce a known value for this
- // load, then it is fully redundant and we can use PHI insertion to compute
- // its value. Insert PHIs and remove the fully redundant value now.
- if (UnavailableBlocks.empty()) {
- DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
- // Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
- LI->replaceAllUsesWith(V);
-
- if (isa<PHINode>(V))
- V->takeName(LI);
- if (V->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(V);
- markInstructionForDeletion(LI);
- ++NumGVNLoad;
- return true;
- }
-
- if (!EnablePRE || !EnableLoadPRE)
- return false;
-
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
- bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), TD);
Value *LoadPtr = 0;
- if (allSingleSucc) {
- LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
- *DT, NewInsts);
- } else {
- Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
- LoadPtr = Address.getAddr();
- }
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
@@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
break;
}
- // Make sure it is valid to move this load here. We have to watch out for:
- // @1 = getelementptr (i8* p, ...
- // test p and branch if == 0
- // load @1
- // It is valid to have the getelementptr before the test, even if p can
- // be 0, as getelementptr only does address arithmetic.
- // If we are not pushing the value through any multiple-successor blocks
- // we do not have this case. Otherwise, check that the load is safe to
- // put anywhere; this can be improved, but should be conservatively safe.
- if (!allSingleSucc &&
- // FIXME: REEVALUTE THIS.
- !isSafeToLoadUnconditionally(LoadPtr,
- UnavailablePred->getTerminator(),
- LI->getAlignment(), TD)) {
- CanDoPRE = false;
- break;
- }
-
I->second = LoadPtr;
}
@@ -1714,6 +1649,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // Step 1: Find the non-local dependencies of the load.
+ LoadDepVect Deps;
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ unsigned NumDeps = Deps.size();
+ if (NumDeps > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (NumDeps == 1 &&
+ !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " has unknown dependencies\n";
+ );
+ return false;
+ }
+
+ // Step 2: Analyze the availability of the load
+ AvailValInBlkVect ValuesPerBlock;
+ UnavailBlkVect UnavailableBlocks;
+ AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty())
+ return false;
+
+ // Step 3: Eliminate fully redundancy.
+ //
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // Step 4: Eliminate partial redundancy.
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+}
+
+
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
diff --git a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
index 5d02c68..4796eb2 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -200,9 +200,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
if (!GV || !GV->hasInitializer()) return;
// Should be an array of 'i8*'.
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (InitList == 0) return;
-
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
if (const GlobalVariable *G =
dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index e98ae95..14c5655 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -56,8 +56,8 @@ namespace {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
- void simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L);
+ bool simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
LoopInfo *LI;
@@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
// loop exit.
- simplifyLoopLatch(L);
+ bool SimplifiedLatch = simplifyLoopLatch(L);
// One loop can be rotated multiple times.
bool MadeChange = false;
- while (rotateLoop(L))
+ while (rotateLoop(L, SimplifiedLatch)) {
MadeChange = true;
-
+ SimplifiedLatch = false;
+ }
return MadeChange;
}
@@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
/// canonical form so downstream passes can handle it.
///
/// I don't believe this invalidates SCEV.
-void LoopRotate::simplifyLoopLatch(Loop *L) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
if (!Latch || Latch->hasAddressTaken())
- return;
+ return false;
BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
if (!Jmp || !Jmp->isUnconditional())
- return;
+ return false;
BasicBlock *LastExit = Latch->getSinglePredecessor();
if (!LastExit || !L->isLoopExiting(LastExit))
- return;
+ return false;
BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
if (!BI)
- return;
+ return false;
if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
- return;
+ return false;
DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
<< LastExit->getName() << "\n");
@@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) {
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->eraseNode(Latch);
Latch->eraseFromParent();
+ return true;
}
/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+ if (OrigLatch == 0)
+ return false;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
return false;
// Check size of original header and reject loop if it is very big or we can't
@@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) {
++NumRotated;
return true;
}
-
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 7ee4027..a3c241d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -143,13 +143,9 @@ namespace {
// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
// than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
// "z" in the order of X-Y-Z is better than any other orders.
- class PtrSortFunctor {
- ArrayRef<XorOpnd> A;
-
- public:
- PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {}
- bool operator()(unsigned LHSIndex, unsigned RHSIndex) {
- return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank();
+ struct PtrSortFunctor {
+ bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
+ return LHS->getSymbolicRank() < RHS->getSymbolicRank();
}
};
private:
@@ -1199,9 +1195,6 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
if (X != Opnd2->getSymbolicPart())
return false;
- const APInt &C1 = Opnd1->getConstPart();
- const APInt &C2 = Opnd2->getConstPart();
-
// This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
int DeadInstNum = 1;
if (Opnd1->getValue()->hasOneUse())
@@ -1219,6 +1212,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
if (Opnd2->isOrExpr())
std::swap(Opnd1, Opnd2);
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3((~C1) ^ C2);
// Do not increase code size!
@@ -1234,6 +1229,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
} else if (Opnd1->isOrExpr()) {
// Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
//
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
// Do not increase code size
@@ -1248,6 +1245,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
} else {
// Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
//
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
Res = createAndInstr(I, X, C3);
}
@@ -1274,7 +1273,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
return 0;
SmallVector<XorOpnd, 8> Opnds;
- SmallVector<unsigned, 8> OpndIndices;
+ SmallVector<XorOpnd*, 8> OpndPtrs;
Type *Ty = Ops[0].Op->getType();
APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
@@ -1285,23 +1284,29 @@ Value *Reassociate::OptimizeXor(Instruction *I,
XorOpnd O(V);
O.setSymbolicRank(getRank(O.getSymbolicPart()));
Opnds.push_back(O);
- OpndIndices.push_back(Opnds.size() - 1);
} else
ConstOpnd ^= cast<ConstantInt>(V)->getValue();
}
+ // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
+ // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
+ // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
+ // with the previous loop --- the iterator of the "Opnds" may be invalidated
+ // when new elements are added to the vector.
+ for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
+ OpndPtrs.push_back(&Opnds[i]);
+
// Step 2: Sort the Xor-Operands in a way such that the operands containing
// the same symbolic value cluster together. For instance, the input operand
// sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
// ("x | 123", "x & 789", "y & 456").
- std::sort(OpndIndices.begin(), OpndIndices.end(),
- XorOpnd::PtrSortFunctor(Opnds));
+ std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
// Step 3: Combine adjacent operands
XorOpnd *PrevOpnd = 0;
bool Changed = false;
for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
- XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]];
+ XorOpnd *CurrOpnd = OpndPtrs[i];
// The combined value
Value *CV;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index f6bb365..d073e78 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2322,17 +2322,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(Mask),
Name + ".expand");
- DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
Mask.clear();
for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
- if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i));
- else
- Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
- V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
- Name + "insert");
- DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+
+ V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
+
+ DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
}
@@ -2671,6 +2669,7 @@ private:
StoreInst *NewSI;
if (BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset &&
canConvertValue(TD, V->getType(), NewAllocaTy)) {
V = convertValue(TD, IRB, V, NewAllocaTy);
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -3050,16 +3049,16 @@ private:
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
-
- // Find the operand we need to rewrite here.
- bool IsTrueVal = SI.getTrueValue() == OldPtr;
- if (IsTrueVal)
- assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
- else
- assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+ assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
+ "Pointer isn't an operand!");
Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
- SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+ // Replace the operands which were using the old pointer.
+ if (SI.getOperand(1) == OldPtr)
+ SI.setOperand(1, NewPtr);
+ if (SI.getOperand(2) == OldPtr)
+ SI.setOperand(2, NewPtr);
+
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
return false;
diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e590a37..bfde334 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
}
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the alloca instructions in the function, removing them
-// if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the entry block, removing
+// them if they are only used by getelementptr instructions.
//
bool SROA::performScalarRepl(Function &F) {
std::vector<AllocaInst*> WorkList;
@@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
- if (!IdxVal) {
- // Non constant GEPs are only a problem on arrays, structs, and pointers
- // Vectors can be dynamically indexed.
- // FIXME: Add support for dynamic indexing on arrays. This should be
- // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
- // isn't.
- if (!(*GEPIt)->isVectorTy())
- return MarkUnsafe(Info, GEPI);
- NonConstant = true;
- NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
- }
+ if (!IdxVal)
+ return MarkUnsafe(Info, GEPI);
}
// Compute the offset due to this GEP and check if the alloca has a
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 63d7a1d..be8d39e 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -87,29 +87,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(I) && "No mapping from source argument specified!");
#endif
- // Clone any attributes.
- if (NewFunc->arg_size() == OldFunc->arg_size())
- NewFunc->copyAttributesFrom(OldFunc);
- else {
- //Some arguments were deleted with the VMap. Copy arguments one by one
- for (Function::const_arg_iterator I = OldFunc->arg_begin(),
- E = OldFunc->arg_end(); I != E; ++I)
- if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
- AttributeSet attrs = OldFunc->getAttributes()
- .getParamAttributes(I->getArgNo() + 1);
- if (attrs.getNumSlots() > 0)
- Anew->addAttr(attrs);
- }
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::ReturnIndex,
- OldFunc->getAttributes()));
- NewFunc->setAttributes(NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(),
- AttributeSet::FunctionIndex,
- OldFunc->getAttributes()));
+ AttributeSet OldAttrs = OldFunc->getAttributes();
+ // Clone any argument attributes that are present in the VMap.
+ for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+ E = OldFunc->arg_end();
+ I != E; ++I)
+ if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+ AttributeSet attrs =
+ OldAttrs.getParamAttributes(I->getArgNo() + 1);
+ if (attrs.getNumSlots() > 0)
+ Anew->addAttr(attrs);
+ }
- }
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::ReturnIndex,
+ OldAttrs.getRetAttributes()));
+ NewFunc->setAttributes(NewFunc->getAttributes()
+ .addAttributes(NewFunc->getContext(),
+ AttributeSet::FunctionIndex,
+ OldAttrs.getFnAttributes()));
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index e9828d6..dabb67b9 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -758,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// If the call site was an invoke instruction, add a branch to the normal
// destination.
- if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
- BranchInst::Create(II->getNormalDest(), TheCall);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+ BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+ }
// If the return instruction returned a value, replace uses of the call with
// uses of the returned value.
@@ -787,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// "starter" and "ender" blocks. How we accomplish this depends on whether
// this is an invoke instruction or a call instruction.
BasicBlock *AfterCallBB;
+ BranchInst *CreatedBranchToNormalDest = NULL;
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
// Add an unconditional branch to make this look like the CallInst case...
- BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+ CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
// Split the basic block. This guarantees that no PHI nodes will have to be
// updated due to new incoming edges, and make the invoke case more
// symmetric to the call case.
- AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+ AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
CalledFunc->getName()+".exit");
} else { // It's a call
@@ -850,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Add a branch to the merge points and remove return instructions.
+ DebugLoc Loc;
for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
ReturnInst *RI = Returns[i];
- BranchInst::Create(AfterCallBB, RI);
+ BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+ Loc = RI->getDebugLoc();
+ BI->setDebugLoc(Loc);
RI->eraseFromParent();
}
+ // We need to set the debug location to *somewhere* inside the
+ // inlined function. The line number may be nonsensical, but the
+ // instruction will at least be associated with the right
+ // function.
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Loc);
} else if (!Returns.empty()) {
// Otherwise, if there is exactly one return value, just replace anything
// using the return value of the call with the computed value.
@@ -874,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
AfterCallBB->getInstList().splice(AfterCallBB->begin(),
ReturnBB->getInstList());
+ if (CreatedBranchToNormalDest)
+ CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
// Delete the return instruction now and empty ReturnBB now.
Returns[0]->eraseFromParent();
ReturnBB->eraseFromParent();
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index be80d34..12e5b3e 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -832,7 +832,24 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
/// Dbg Intrinsic utilities
///
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+ // Since we can't guarantee that the original dbg.declare instrinsic
+ // is removed by LowerDbgDeclare(), we need to make sure that we are
+ // not inserting the same dbg.value intrinsic over and over.
+ llvm::BasicBlock::InstListType::iterator PrevI(I);
+ if (PrevI != I->getParent()->getInstList().begin()) {
+ --PrevI;
+ if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+ if (DVI->getValue() == I->getOperand(0) &&
+ DVI->getOffset() == 0 &&
+ DVI->getVariable() == DIVar)
+ return true;
+ }
+ return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI, DIBuilder &Builder) {
@@ -840,6 +857,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (!DIVar.Verify())
return false;
+ if (LdStHasDebugValue(DIVar, SI))
+ return true;
+
Instruction *DbgVal = NULL;
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -863,7 +883,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
return true;
}
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
/// that has an associated llvm.dbg.decl intrinsic.
bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
LoadInst *LI, DIBuilder &Builder) {
@@ -871,6 +891,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (!DIVar.Verify())
return false;
+ if (LdStHasDebugValue(DIVar, LI))
+ return true;
+
Instruction *DbgVal =
Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
DIVar, LI);
@@ -902,6 +925,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
E = Dbgs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+ // We only remove the dbg.declare intrinsic if all uses are
+ // converted to dbg.value intrinsics.
bool RemoveDDI = true;
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 681bf9c..052ad85 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -59,6 +59,10 @@ static cl::opt<bool>
SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
cl::desc("Sink common instructions down to the end block"));
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+ cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -1332,6 +1336,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return Changed;
}
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... // No other stores or function calls (we could be calling a memory
+/// ... // function).
+/// %cmp = icmp ult %x, %y
+/// br i1 %cmp, label %EndBB, label %ThenBB
+/// ThenBB:
+/// store i32 %add5, i32* %arrayidx2
+/// br label EndBB
+/// EndBB:
+/// ...
+/// We are going to transform this into:
+/// BrBB:
+/// store i32 %add, i32* %arrayidx2
+/// ... //
+/// %cmp = icmp ult %x, %y
+/// %add.add5 = select i1 %cmp, i32 %add, %add5
+/// store i32 %add.add5, i32* %arrayidx2
+/// ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+/// hoisted into the predecessor block. 0 otherwise.
+Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+ BasicBlock *StoreBB, BasicBlock *EndBB) {
+ StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+ if (!StoreToHoist)
+ return 0;
+
+ // Volatile or atomic.
+ if (!StoreToHoist->isSimple())
+ return 0;
+
+ Value *StorePtr = StoreToHoist->getPointerOperand();
+
+ // Look for a store to the same pointer in BrBB.
+ unsigned MaxNumInstToLookAt = 10;
+ for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+ RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+ Instruction *CurI = &*RI;
+
+ // Could be calling an instruction that effects memory like free().
+ if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+ return 0;
+
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ // Found the previous store make sure it stores to the same location.
+ if (SI && SI->getPointerOperand() == StorePtr)
+ // Found the previous store, return its value operand.
+ return SI->getValueOperand();
+ else if (SI)
+ return 0; // Unknown store.
+ }
+
+ return 0;
+}
+
/// \brief Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -1395,6 +1459,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
unsigned SpeculationCost = 0;
+ Value *SpeculatedStoreValue = 0;
+ StoreInst *SpeculatedStore = 0;
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = llvm::prior(ThenBB->end());
BBI != BBE; ++BBI) {
@@ -1410,13 +1476,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I))
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores &&
+ (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+ EndBB))))
return false;
- if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+ if (!SpeculatedStoreValue &&
+ ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
return false;
+ // Store the store speculation candidate.
+ if (SpeculatedStoreValue)
+ SpeculatedStore = cast<StoreInst>(I);
+
// Do not hoist the instruction if any of its operands are defined but not
- // used in this BB. The transformation will prevent the operand from
+ // used in BB. The transformation will prevent the operand from
// being sunk into the use block.
for (User::op_iterator i = I->op_begin(), e = I->op_end();
i != e; ++i) {
@@ -1473,12 +1547,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
// If there are no PHIs to process, bail early. This helps ensure idempotence
// as well.
- if (!HaveRewritablePHIs)
+ if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
return false;
// If we get here, we can hoist the instruction and if-convert.
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ // Insert a select of the value of the speculated store.
+ if (SpeculatedStoreValue) {
+ IRBuilder<true, NoFolder> Builder(BI);
+ Value *TrueV = SpeculatedStore->getValueOperand();
+ Value *FalseV = SpeculatedStoreValue;
+ if (Invert)
+ std::swap(TrueV, FalseV);
+ Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+ "." + FalseV->getName());
+ SpeculatedStore->setOperand(0, S);
+ }
+
// Hoist the instructions.
BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
llvm::prior(ThenBB->end()));
@@ -3073,7 +3159,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
Value *Sub = SI->getCondition();
if (!Offset->isNullValue())
Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
- Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+ Value *Cmp;
+ // If NumCases overflowed, then all possible values jump to the successor.
+ if (NumCases->isNullValue() && SI->getNumCases() != 0)
+ Cmp = ConstantInt::getTrue(SI->getContext());
+ else
+ Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
BranchInst *NewBI = Builder.CreateCondBr(
Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c231704..6bea2dd 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization {
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
+ // Do not do any of the following transformations if the fprintf return
+ // value is used, in general the fprintf return value is not compatible
+ // with fwrite(), fputc() or fputs().
+ if (!CI->use_empty())
+ return 0;
+
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require DataLayout.
if (!TD) return 0;
- Value *NewCI = EmitFWrite(CI->getArgOperand(1),
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()),
- CI->getArgOperand(0), B, TD, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+ return EmitFWrite(CI->getArgOperand(1),
+ ConstantInt::get(TD->getIntPtrType(*Context),
+ FormatStr.size()),
+ CI->getArgOperand(0), B, TD, TLI);
}
// The remaining optimizations require the format string to be "%s" or "%c"
@@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
- Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
- TD, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
- if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+ if (!CI->getArgOperand(2)->getType()->isPointerTy())
return 0;
return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
}
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index 5812d46..c3df215 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
#include "llvm-c/Initialization.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index b5941bd..544c5ee 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -57,7 +57,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
return VM[V] = const_cast<Value*>(V);
// Create a dummy node in case we have a metadata cycle.
- MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
VM[V] = Dummy;
// Check all operands to see if any need to be remapped.
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d26154e..08d3725 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
//
// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
-// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizer uses (will use) the codegen
-// interfaces to generate IR that is likely to result in an optimal binary.
+// and generates target-independent LLVM-IR.
+// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
+// of instructions in order to estimate the profitability of vectorization.
//
// The loop vectorizer combines consecutive loop iterations into a single
// 'wide' iteration. After this transformation the index is incremented
@@ -78,7 +78,9 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -87,6 +89,7 @@
#include <map>
using namespace llvm;
+using namespace llvm::PatternMatch;
static cl::opt<unsigned>
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
@@ -112,9 +115,9 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
/// We don't unroll loops with a known constant trip count below this number.
static const unsigned TinyTripCountUnrollThreshold = 128;
-/// When performing a runtime memory check, do not check more than this
-/// number of pointers. Notice that the check is quadratic!
-static const unsigned RuntimeMemoryCheckThreshold = 4;
+/// When performing memory disambiguation checks at runtime do not make more
+/// than this number of comparisons.
+static const unsigned RuntimeMemoryCheckThreshold = 8;
/// We use a metadata with this name to indicate that a scalar loop was
/// vectorized and that we don't need to re-vectorize it if we run into it
@@ -333,7 +336,7 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0) {}
+ Induction(0), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -343,8 +346,10 @@ public:
RK_IntegerOr, ///< Bitwise or logical OR of numbers.
RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
+ RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
RK_FloatAdd, ///< Sum of floats.
- RK_FloatMult ///< Product of floats.
+ RK_FloatMult, ///< Product of floats.
+ RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
};
/// This enum represents the kinds of inductions that we support.
@@ -356,21 +361,52 @@ public:
IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
};
+ // This enum represents the kind of minmax reduction.
+ enum MinMaxReductionKind {
+ MRK_Invalid,
+ MRK_UIntMin,
+ MRK_UIntMax,
+ MRK_SIntMin,
+ MRK_SIntMax,
+ MRK_FloatMin,
+ MRK_FloatMax
+ };
+
/// This POD struct holds information about reduction variables.
struct ReductionDescriptor {
ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
- Kind(RK_NoReduction) {}
+ Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
- ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+ ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
+ MinMaxReductionKind MK)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
// The starting value of the reduction.
// It does not have to be zero!
- Value *StartValue;
+ TrackingVH<Value> StartValue;
// The instruction who's value is used outside the loop.
Instruction *LoopExitInstr;
// The kind of the reduction.
ReductionKind Kind;
+ // If this a min/max reduction the kind of reduction.
+ MinMaxReductionKind MinMaxKind;
+ };
+
+ /// This POD struct holds information about a potential reduction operation.
+ struct ReductionInstDesc {
+ ReductionInstDesc(bool IsRedux, Instruction *I) :
+ IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+
+ ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
+ IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
+
+ // Is this instruction a reduction candidate.
+ bool IsReduction;
+ // The last instruction in a min/max pattern (select of the select(icmp())
+ // pattern), or the current reduction instruction otherwise.
+ Instruction *PatternLastInst;
+ // If this is a min/max pattern the comparison predicate.
+ MinMaxReductionKind MinMaxKind;
};
// This POD struct holds information about the memory runtime legality
@@ -387,16 +423,18 @@ public:
}
/// Insert a pointer and calculate the start and end SCEVs.
- void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr);
+ void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr);
/// This flag indicates if we need to add the runtime check.
bool Need;
/// Holds the pointers that we need to check.
- SmallVector<Value*, 2> Pointers;
+ SmallVector<TrackingVH<Value>, 2> Pointers;
/// Holds the pointer value at the beginning of the loop.
SmallVector<const SCEV*, 2> Starts;
/// Holds the pointer value at the end of the loop.
SmallVector<const SCEV*, 2> Ends;
+ /// Holds the information if this pointer is used for writing to memory.
+ SmallVector<bool, 2> IsWritePtr;
};
/// A POD for saving information about induction variables.
@@ -404,7 +442,7 @@ public:
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
/// Start value.
- Value *StartValue;
+ TrackingVH<Value> StartValue;
/// Induction kind.
InductionKind IK;
};
@@ -461,6 +499,10 @@ public:
/// Returns the information that we collected about runtime memory check.
RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+
+ /// This function returns the identity element (or neutral element) for
+ /// the operation K.
+ static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -487,9 +529,17 @@ private:
/// Returns True, if 'Phi' is the kind of reduction variable for type
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
- /// Returns true if the instruction I can be a reduction variable of type
- /// 'Kind'.
- bool isReductionInstr(Instruction *I, ReductionKind Kind);
+ /// Returns a struct describing if the instruction 'I' can be a reduction
+ /// variable of type 'Kind'. If the reduction is a min/max pattern of
+ /// select(icmp()) this function advances the instruction pointer 'I' from the
+ /// compare instruction to the select instruction and stores this pointer in
+ /// 'PatternLastInst' member of the returned struct.
+ ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
+ ReductionInstDesc &Desc);
+ /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+ /// pattern corresponding to a min(X, Y) or max(X, Y).
+ static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev);
/// Returns the induction kind of Phi. This function may return NoInduction
/// if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi);
@@ -540,6 +590,8 @@ private:
/// We need to check that all of the pointers in this list are disjoint
/// at runtime.
RuntimePointerCheck PtrRtCheck;
+ /// Can we assume the absence of NaNs.
+ bool HasFunNoNaNAttr;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -662,6 +714,11 @@ struct LoopVectorize : public LoopPass {
AA = getAnalysisIfAvailable<AliasAnalysis>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+ if (DL == NULL) {
+ DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+ return false;
+ }
+
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
L->getHeader()->getParent()->getName() << "\"\n");
@@ -737,7 +794,8 @@ struct LoopVectorize : public LoopPass {
void
LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
- Loop *Lp, Value *Ptr) {
+ Loop *Lp, Value *Ptr,
+ bool WritePtr) {
const SCEV *Sc = SE->getSCEV(Ptr);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
assert(AR && "Invalid addrec expression");
@@ -746,6 +804,7 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
Pointers.push_back(Ptr);
Starts.push_back(AR->getStart());
Ends.push_back(ScEnd);
+ IsWritePtr.push_back(WritePtr);
}
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
@@ -906,12 +965,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+ unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
+ unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+
+ if (ScalarAllocatedSize != VectorElementSize)
+ return scalarizeInstruction(Instr);
+
// If the pointer is loop invariant or if it is non consecutive,
// scalarize the load.
- int Stride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = Stride < 0;
+ int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = ConsecutiveStride < 0;
bool UniformLoad = LI && Legal->isUniform(Ptr);
- if (Stride == 0 || UniformLoad)
+ if (!ConsecutiveStride || UniformLoad)
return scalarizeInstruction(Instr);
Constant *Zero = Builder.getInt32(0);
@@ -1110,6 +1175,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
+ // No need to check if two readonly pointers intersect.
+ if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
+ continue;
+
Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy, "bc");
@@ -1167,7 +1236,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Mark the old scalar loop with metadata that tells us not to vectorize this
// loop again if we run into it.
- MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+ MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None);
OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
// Some loops have a single integer induction variable, while other loops
@@ -1436,24 +1505,24 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
/// This function returns the identity element (or neutral element) for
/// the operation K.
-static Constant*
-getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
+Constant*
+LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
switch (K) {
- case LoopVectorizationLegality:: RK_IntegerXor:
- case LoopVectorizationLegality:: RK_IntegerAdd:
- case LoopVectorizationLegality:: RK_IntegerOr:
+ case RK_IntegerXor:
+ case RK_IntegerAdd:
+ case RK_IntegerOr:
// Adding, Xoring, Oring zero to a number does not change it.
return ConstantInt::get(Tp, 0);
- case LoopVectorizationLegality:: RK_IntegerMult:
+ case RK_IntegerMult:
// Multiplying a number by 1 does not change it.
return ConstantInt::get(Tp, 1);
- case LoopVectorizationLegality:: RK_IntegerAnd:
+ case RK_IntegerAnd:
// AND-ing a number with an all-1 value does not change it.
return ConstantInt::get(Tp, -1, true);
- case LoopVectorizationLegality:: RK_FloatMult:
+ case RK_FloatMult:
// Multiplying a number by 1 does not change it.
return ConstantFP::get(Tp, 1.0L);
- case LoopVectorizationLegality:: RK_FloatAdd:
+ case RK_FloatAdd:
// Adding zero to a number does not change it.
return ConstantFP::get(Tp, 0.0L);
default:
@@ -1566,7 +1635,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
}
/// This function translates the reduction kind to an LLVM binary operator.
-static Instruction::BinaryOps
+static unsigned
getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
switch (Kind) {
case LoopVectorizationLegality::RK_IntegerAdd:
@@ -1583,11 +1652,53 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
return Instruction::FMul;
case LoopVectorizationLegality::RK_FloatAdd:
return Instruction::FAdd;
+ case LoopVectorizationLegality::RK_IntegerMinMax:
+ return Instruction::ICmp;
+ case LoopVectorizationLegality::RK_FloatMinMax:
+ return Instruction::FCmp;
default:
llvm_unreachable("Unknown reduction operation");
}
}
+Value *createMinMaxOp(IRBuilder<> &Builder,
+ LoopVectorizationLegality::MinMaxReductionKind RK,
+ Value *Left,
+ Value *Right) {
+ CmpInst::Predicate P = CmpInst::ICMP_NE;
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max reduction kind");
+ case LoopVectorizationLegality::MRK_UIntMin:
+ P = CmpInst::ICMP_ULT;
+ break;
+ case LoopVectorizationLegality::MRK_UIntMax:
+ P = CmpInst::ICMP_UGT;
+ break;
+ case LoopVectorizationLegality::MRK_SIntMin:
+ P = CmpInst::ICMP_SLT;
+ break;
+ case LoopVectorizationLegality::MRK_SIntMax:
+ P = CmpInst::ICMP_SGT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMin:
+ P = CmpInst::FCMP_OLT;
+ break;
+ case LoopVectorizationLegality::MRK_FloatMax:
+ P = CmpInst::FCMP_OGT;
+ break;
+ }
+
+ Value *Cmp;
+ if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
+ Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+ else
+ Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+ Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+ return Select;
+}
+
void
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
//===------------------------------------------------===//
@@ -1651,13 +1762,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Find the reduction identity variable. Zero for addition, or, xor,
// one for multiplication, -1 for And.
- Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
- Constant *Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- Value *VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
+ Value *Identity;
+ Value *VectorStart;
+ if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
+ RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+ // MinMax reduction have the start value as their identify.
+ VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
+ "minmax.ident");
+ } else {
+ Constant *Iden =
+ LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+ VecTy->getScalarType());
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+ }
// Fix the vector-loop phi.
// We created the induction variable so we know that the
@@ -1699,10 +1821,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
// Reduce all of the unrolled parts into a single vector.
Value *ReducedPartRdx = RdxParts[0];
+ unsigned Op = getReductionBinOp(RdxDesc.Kind);
for (unsigned part = 1; part < UF; ++part) {
- Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
- ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
- "bin.rdx");
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
+ RdxParts[part], ReducedPartRdx,
+ "bin.rdx");
+ else
+ ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
+ ReducedPartRdx, RdxParts[part]);
}
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
@@ -1727,8 +1854,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
ConstantVector::get(ShuffleMask),
"rdx.shuf");
- Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
- TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+ "bin.rdx");
+ else
+ TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
}
// The result is in the first element of the vector.
@@ -1861,18 +1991,33 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
// We know that all PHIs in non header blocks are converted into
// selects, so we don't have to worry about the insertion order and we
// can just use the builder.
-
// At this point we generate the predication tree. There may be
// duplications since this is a simple recursive scan, but future
// optimizations will clean it up.
- VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
- P->getParent());
- for (unsigned part = 0; part < UF; ++part) {
- VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
- VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
- Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
- "predphi");
+ unsigned NumIncoming = P->getNumIncomingValues();
+ assert(NumIncoming > 1 && "Invalid PHI");
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+ P->getParent());
+ VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+ for (unsigned part = 0; part < UF; ++part) {
+ // We don't need to 'select' the first PHI operand because it is
+ // the default value if all of the other masks don't match.
+ if (In == 0)
+ Entry[part] = In0[part];
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+ Entry[part], "predphi");
+ }
}
continue;
}
@@ -2153,12 +2298,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!isa<BranchInst>(BB->getTerminator()))
return false;
- // We must have at most two predecessors because we need to convert
- // all PHIs to selects.
- unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
- if (Preds > 2)
- return false;
-
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
return false;
@@ -2239,6 +2378,26 @@ bool LoopVectorizationLegality::canVectorize() {
return true;
}
+/// \brief Check that the instruction has outside loop users and is not an
+/// identified reduction variable.
+static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
+ SmallPtrSet<Value *, 4> &Reductions) {
+ // Reduction instructions are allowed to have exit users. All other
+ // instructions must not have external users.
+ if (!Reductions.count(Inst))
+ //Check that all of the users of the loop are inside the BB.
+ for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+ I != E; ++I) {
+ Instruction *U = cast<Instruction>(*I);
+ // This user may be a reduction exit value.
+ if (!TheLoop->contains(U)) {
+ DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ return true;
+ }
+ }
+ return false;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
@@ -2250,6 +2409,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}
+ // Look for the attribute signaling the absence of NaNs.
+ Function &F = *Header->getParent();
+ if (F.hasFnAttribute("no-nans-fp-math"))
+ HasFunNoNaNAttr = F.getAttributes().getAttribute(
+ AttributeSet::FunctionIndex,
+ "no-nans-fp-math").getValueAsString() == "true";
+
// For each block in the loop.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2259,12 +2425,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
++it) {
if (PHINode *Phi = dyn_cast<PHINode>(it)) {
- // This should not happen because the loop should be normalized.
- if (Phi->getNumIncomingValues() != 2) {
- DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
- return false;
- }
-
// Check that this PHI type is allowed.
if (!Phi->getType()->isIntegerTy() &&
!Phi->getType()->isFloatingPointTy() &&
@@ -2276,8 +2436,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// If this PHINode is not in the header block, then we know that we
// can convert it to select during if-conversion. No need to check if
// the PHIs in this block are induction or reduction variables.
- if (*bb != Header)
- continue;
+ if (*bb != Header) {
+ // Check that this instruction has no outside users or is an
+ // identified reduction value with an outside user.
+ if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ continue;
+ return false;
+ }
+
+ // We only allow if-converted PHIs with more than two incoming values.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
// This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
@@ -2319,6 +2490,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
continue;
}
+ if (AddReductionVar(Phi, RK_IntegerMinMax)) {
+ DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
if (AddReductionVar(Phi, RK_FloatMult)) {
DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
continue;
@@ -2327,6 +2502,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
continue;
}
+ if (AddReductionVar(Phi, RK_FloatMinMax)) {
+ DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
@@ -2356,17 +2535,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (!AllowedExit.count(it))
- //Check that all of the users of the loop are inside the BB.
- for (Value::use_iterator I = it->use_begin(), E = it->use_end();
- I != E; ++I) {
- Instruction *U = cast<Instruction>(*I);
- // This user may be a reduction exit value.
- if (!TheLoop->contains(U)) {
- DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
- return false;
- }
- }
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ return false;
+
} // next instr.
}
@@ -2446,13 +2617,6 @@ LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
bool LoopVectorizationLegality::canVectorizeMemory() {
- if (TheLoop->isAnnotatedParallel()) {
- DEBUG(dbgs()
- << "LV: A loop annotated parallel, ignore memory dependency "
- << "checks.\n");
- return true;
- }
-
typedef SmallVector<Value*, 16> ValueVector;
typedef SmallPtrSet<Value*, 16> ValueSet;
// Holds the Load and Store *instructions*.
@@ -2461,6 +2625,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
PtrRtCheck.Pointers.clear();
PtrRtCheck.Need = false;
+ const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2475,7 +2641,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (it->mayReadFromMemory()) {
LoadInst *Ld = dyn_cast<LoadInst>(it);
if (!Ld) return false;
- if (!Ld->isSimple()) {
+ if (!Ld->isSimple() && !IsAnnotatedParallel) {
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
return false;
}
@@ -2487,7 +2653,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) return false;
- if (!St->isSimple()) {
+ if (!St->isSimple() && !IsAnnotatedParallel) {
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
return false;
}
@@ -2534,6 +2700,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
ReadWrites.insert(std::make_pair(Ptr, ST));
}
+ if (IsAnnotatedParallel) {
+ DEBUG(dbgs()
+ << "LV: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
+ return true;
+ }
+
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
LoadInst *LD = cast<LoadInst>(*I);
Value* Ptr = LD->getPointerOperand();
@@ -2556,6 +2729,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
return true;
}
+ unsigned NumReadPtrs = 0;
+ unsigned NumWritePtrs = 0;
+
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;
@@ -2563,7 +2739,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
Value *V = (*MI).first;
if (hasComputableBounds(V)) {
- PtrRtCheck.insert(SE, TheLoop, V);
+ PtrRtCheck.insert(SE, TheLoop, V, true);
+ NumWritePtrs++;
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
} else {
CanDoRT = false;
@@ -2573,7 +2750,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
Value *V = (*MI).first;
if (hasComputableBounds(V)) {
- PtrRtCheck.insert(SE, TheLoop, V);
+ PtrRtCheck.insert(SE, TheLoop, V, false);
+ NumReadPtrs++;
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
} else {
CanDoRT = false;
@@ -2583,7 +2761,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Check that we did not collect too many pointers or found a
// unsizeable pointer.
- if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+ unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1));
+ DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n");
+ if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
PtrRtCheck.reset();
CanDoRT = false;
}
@@ -2646,8 +2826,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
+ << "\n");
return false;
}
@@ -2690,8 +2870,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Inst,
WriteObjects,
MaxByteWidth)) {
- DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
- << *UI <<"\n");
+ DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
+ << "\n");
return false;
}
}
@@ -2737,7 +2917,18 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// used as reduction variables (such as ADD). We may have a single
// out-of-block user. The cycle must end with the original PHI.
Instruction *Iter = Phi;
- while (true) {
+
+ // To recognize min/max patterns formed by a icmp select sequence, we store
+ // the number of instruction we saw from the recognized min/max pattern,
+ // such that we don't stop when we see the phi has two uses (one by the select
+ // and one by the icmp) and to make sure we only see exactly the two
+ // instructions.
+ unsigned NumCmpSelectPatternInst = 0;
+ ReductionInstDesc ReduxDesc(false, 0);
+
+ // Avoid cycles in the chain.
+ SmallPtrSet<Instruction *, 8> VisitedInsts;
+ while (VisitedInsts.insert(Iter)) {
// If the instruction has no users then this is a broken
// chain and can't be a reduction variable.
if (Iter->use_empty())
@@ -2751,9 +2942,6 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// Is this a bin op ?
FoundBinOp |= !isa<PHINode>(Iter);
- // Remember the current instruction.
- Instruction *OldIter = Iter;
-
// For each of the *users* of iter.
for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
it != e; ++it) {
@@ -2782,25 +2970,35 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
Iter->hasNUsesOrMore(2))
continue;
- // We can't have multiple inside users.
- if (FoundInBlockUser)
+ // We can't have multiple inside users except for a combination of
+ // icmp/select both using the phi.
+ if (FoundInBlockUser && !NumCmpSelectPatternInst)
return false;
FoundInBlockUser = true;
// Any reduction instr must be of one of the allowed kinds.
- if (!isReductionInstr(U, Kind))
+ ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
+ if (!ReduxDesc.IsReduction)
return false;
+ if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U)))
+ ++NumCmpSelectPatternInst;
+
// Reductions of instructions such as Div, and Sub is only
// possible if the LHS is the reduction variable.
- if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
+ if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
+ !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter)
return false;
- Iter = U;
+ Iter = ReduxDesc.PatternLastInst;
}
- // If all uses were skipped this can't be a reduction variable.
- if (Iter == OldIter)
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
return false;
// We found a reduction var if we have reached the original
@@ -2811,47 +3009,107 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
AllowedExit.insert(ExitInstruction);
// Save the description of this reduction variable.
- ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+ ReduxDesc.MinMaxKind);
Reductions[Phi] = RD;
// We've ended the cycle. This is a reduction variable if we have an
// outside user and it has a binary op.
return FoundBinOp && ExitInstruction;
}
}
+
+ return false;
}
-bool
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+LoopVectorizationLegality::ReductionInstDesc
+LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
+ ReductionInstDesc &Prev) {
+
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+ "Expect a select instruction");
+ Instruction *Cmp = 0;
+ SelectInst *Select = 0;
+
+ // We must handle the select(cmp()) as a single instruction. Advance to the
+ // select.
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+ if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
+ return ReductionInstDesc(false, I);
+ return ReductionInstDesc(Select, Prev.MinMaxKind);
+ }
+
+ // Only handle single use cases for now.
+ if (!(Select = dyn_cast<SelectInst>(I)))
+ return ReductionInstDesc(false, I);
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+ return ReductionInstDesc(false, I);
+ if (!Cmp->hasOneUse())
+ return ReductionInstDesc(false, I);
+
+ Value *CmpLeft;
+ Value *CmpRight;
+
+ // Look for a min/max pattern.
+ if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_UIntMin);
+ else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_UIntMax);
+ else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_SIntMax);
+ else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return ReductionInstDesc(Select, MRK_FloatMax);
+
+ return ReductionInstDesc(false, I);
+}
+
+LoopVectorizationLegality::ReductionInstDesc
LoopVectorizationLegality::isReductionInstr(Instruction *I,
- ReductionKind Kind) {
+ ReductionKind Kind,
+ ReductionInstDesc &Prev) {
bool FP = I->getType()->isFloatingPointTy();
bool FastMath = (FP && I->isCommutative() && I->isAssociative());
-
switch (I->getOpcode()) {
default:
- return false;
+ return ReductionInstDesc(false, I);
case Instruction::PHI:
- if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
- return false;
- // possibly.
- return true;
+ if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
+ Kind != RK_FloatMinMax))
+ return ReductionInstDesc(false, I);
+ return ReductionInstDesc(I, Prev.MinMaxKind);
case Instruction::Sub:
case Instruction::Add:
- return Kind == RK_IntegerAdd;
- case Instruction::SDiv:
- case Instruction::UDiv:
+ return ReductionInstDesc(Kind == RK_IntegerAdd, I);
case Instruction::Mul:
- return Kind == RK_IntegerMult;
+ return ReductionInstDesc(Kind == RK_IntegerMult, I);
case Instruction::And:
- return Kind == RK_IntegerAnd;
+ return ReductionInstDesc(Kind == RK_IntegerAnd, I);
case Instruction::Or:
- return Kind == RK_IntegerOr;
+ return ReductionInstDesc(Kind == RK_IntegerOr, I);
case Instruction::Xor:
- return Kind == RK_IntegerXor;
+ return ReductionInstDesc(Kind == RK_IntegerXor, I);
case Instruction::FMul:
- return Kind == RK_FloatMult && FastMath;
+ return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
case Instruction::FAdd:
- return Kind == RK_FloatAdd && FastMath;
- }
+ return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ case Instruction::Select:
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+ return ReductionInstDesc(false, I);
+ return isMinMaxSelectCmpPattern(I, Prev);
+ }
}
LoopVectorizationLegality::InductionKind
@@ -3384,9 +3642,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
// Scalarized loads/stores.
- int Stride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = Stride < 0;
- if (0 == Stride) {
+ int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ bool Reverse = ConsecutiveStride < 0;
+ unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
+ unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+ if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
unsigned Cost = 0;
// The cost of extracting from the value vector and pointer vector.
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
new file mode 100644
index 0000000..cc30cc9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -0,0 +1,348 @@
+//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE SV_NAME
+
+#include "VecUtils.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<int>
+SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
+ cl::desc("Only vectorize trees if the gain is above this "
+ "number. (gain = -cost of vectorization)"));
+namespace {
+
+/// The SLPVectorizer Pass.
+struct SLPVectorizer : public FunctionPass {
+ typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ explicit SLPVectorizer() : FunctionPass(ID) {
+ initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+ LoopInfo *LI;
+
+ virtual bool runOnFunction(Function &F) {
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ LI = &getAnalysis<LoopInfo>();
+
+ StoreRefs.clear();
+ bool Changed = false;
+
+ // Must have DataLayout. We can't require it because some tests run w/o
+ // triple.
+ if (!DL)
+ return false;
+
+ for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
+ BasicBlock *BB = it;
+ bool BBChanged = false;
+
+ // Use the bollom up slp vectorizer to construct chains that start with
+ // he store instructions.
+ BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
+
+ // Vectorize trees that end at reductions.
+ BBChanged |= vectorizeReductions(BB, R);
+
+ // Vectorize trees that end at stores.
+ if (unsigned count = collectStores(BB, R)) {
+ (void)count;
+ DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
+ BBChanged |= vectorizeStoreChains(R);
+ }
+
+ // Try to hoist some of the scalarization code to the preheader.
+ if (BBChanged) hoistGatherSequence(LI, BB, R);
+
+ Changed |= BBChanged;
+ }
+
+ if (Changed) {
+ DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
+ DEBUG(verifyFunction(F));
+ }
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<LoopInfo>();
+ }
+
+private:
+
+ /// \brief Collect memory references and sort them according to their base
+ /// object. We sort the stores to their base objects to reduce the cost of the
+ /// quadratic search on the stores. TODO: We can further reduce this cost
+ /// if we flush the chain creation every time we run into a memory barrier.
+ unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
+
+ /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
+ bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
+
+ /// \brief Try to vectorize a list of operands.
+ bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+
+ /// \brief Try to vectorize a chain that may start at the operands of \V;
+ bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
+
+ /// \brief Vectorize the stores that were collected in StoreRefs.
+ bool vectorizeStoreChains(BoUpSLP &R);
+
+ /// \brief Try to hoist gather sequences outside of the loop in cases where
+ /// all of the sources are loop invariant.
+ void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
+
+ /// \brief Scan the basic block and look for reductions that may start a
+ /// vectorization chain.
+ bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
+
+private:
+ StoreListMap StoreRefs;
+};
+
+unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
+ unsigned count = 0;
+ StoreRefs.clear();
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ StoreInst *SI = dyn_cast<StoreInst>(it);
+ if (!SI)
+ continue;
+
+ // Check that the pointer points to scalars.
+ Type *Ty = SI->getValueOperand()->getType();
+ if (Ty->isAggregateType() || Ty->isVectorTy())
+ return 0;
+
+ // Find the base of the GEP.
+ Value *Ptr = SI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEP->getPointerOperand();
+
+ // Save the store locations.
+ StoreRefs[Ptr].push_back(SI);
+ count++;
+ }
+ return count;
+}
+
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
+ if (!A || !B) return false;
+ Value *VL[] = { A, B };
+ return tryToVectorizeList(VL, R);
+}
+
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+ DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
+
+ // Check that all of the parts are scalar.
+ for (int i = 0, e = VL.size(); i < e; ++i) {
+ Type *Ty = VL[i]->getType();
+ if (Ty->isAggregateType() || Ty->isVectorTy())
+ return 0;
+ }
+
+ int Cost = R.getTreeCost(VL);
+ int ExtrCost = R.getScalarizationCost(VL);
+ DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+ " Cost of extract:" << ExtrCost << ".\n");
+ if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
+ DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
+ R.vectorizeArith(VL);
+ return true;
+}
+
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
+ if (!V) return false;
+ // Try to vectorize V.
+ if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+ return true;
+
+ BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+ BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+ // Try to skip B.
+ if (B && B->hasOneUse()) {
+ BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+ BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+ if (tryToVectorizePair(A, B0, R)) {
+ B->moveBefore(V);
+ return true;
+ }
+ if (tryToVectorizePair(A, B1, R)) {
+ B->moveBefore(V);
+ return true;
+ }
+ }
+
+ // Try to skip A.
+ if (A && A->hasOneUse()) {
+ BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+ BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+ if (tryToVectorizePair(A0, B, R)) {
+ A->moveBefore(V);
+ return true;
+ }
+ if (tryToVectorizePair(A1, B, R)) {
+ A->moveBefore(V);
+ return true;
+ }
+ }
+ return 0;
+}
+
+bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
+ bool Changed = false;
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ if (isa<DbgInfoIntrinsic>(it)) continue;
+
+ // Try to vectorize reductions that use PHINodes.
+ if (PHINode *P = dyn_cast<PHINode>(it)) {
+ // Check that the PHI is a reduction PHI.
+ if (P->getNumIncomingValues() != 2) return Changed;
+ Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
+ (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
+ 0));
+ // Check if this is a Binary Operator.
+ BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
+ if (!BI)
+ continue;
+
+ Value *Inst = BI->getOperand(0);
+ if (Inst == P) Inst = BI->getOperand(1);
+ Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+ continue;
+ }
+
+ // Try to vectorize trees that start at compare instructions.
+ if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
+ if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
+ Changed |= true;
+ continue;
+ }
+ for (int i = 0; i < 2; ++i)
+ if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
+ Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+ continue;
+ }
+ }
+
+ return Changed;
+}
+
+bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
+ bool Changed = false;
+ // Attempt to sort and vectorize each of the store-groups.
+ for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+ it != e; ++it) {
+ if (it->second.size() < 2)
+ continue;
+
+ DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
+ it->second.size() << ".\n");
+
+ Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+ }
+ return Changed;
+}
+
+void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
+ BoUpSLP &R) {
+ // Check if this block is inside a loop.
+ Loop *L = LI->getLoopFor(BB);
+ if (!L)
+ return;
+
+ // Check if it has a preheader.
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ if (!PreHeader)
+ return;
+
+ // Mark the insertion point for the block.
+ Instruction *Location = PreHeader->getTerminator();
+
+ BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
+ for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
+ it != e; ++it) {
+ InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
+
+ // The InsertElement sequence can be simplified into a constant.
+ if (!Insert)
+ continue;
+
+ // If the vector or the element that we insert into it are
+ // instructions that are defined in this basic block then we can't
+ // hoist this instruction.
+ Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
+ Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
+ if (CurrVec && L->contains(CurrVec)) continue;
+ if (NewElem && L->contains(NewElem)) continue;
+
+ // We can hoist this instruction. Move it to the pre-header.
+ Insert->moveBefore(Location);
+ }
+}
+
+} // end anonymous namespace
+
+char SLPVectorizer::ID = 0;
+static const char lv_name[] = "SLP Vectorizer";
+INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createSLPVectorizerPass() {
+ return new SLPVectorizer();
+ }
+}
+
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
new file mode 100644
index 0000000..9b94366
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp
@@ -0,0 +1,730 @@
+//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "SLP"
+
+#include "VecUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+
+static const unsigned MinVecRegSize = 128;
+
+static const unsigned RecursionMaxDepth = 6;
+
+namespace llvm {
+
+BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
+ BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
+ numberInstructions();
+}
+
+void BoUpSLP::numberInstructions() {
+ int Loc = 0;
+ InstrIdx.clear();
+ InstrVec.clear();
+ // Number the instructions in the block.
+ for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
+ InstrIdx[it] = Loc++;
+ InstrVec.push_back(it);
+ assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
+ }
+}
+
+Value *BoUpSLP::getPointerOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
+ return 0;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+ if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
+ if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
+ return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+ Value *PtrA = getPointerOperand(A);
+ Value *PtrB = getPointerOperand(B);
+ unsigned ASA = getAddressSpaceOperand(A);
+ unsigned ASB = getAddressSpaceOperand(B);
+
+ // Check that the address spaces match and that the pointers are valid.
+ if (!PtrA || !PtrB || (ASA != ASB)) return false;
+
+ // Check that A and B are of the same type.
+ if (PtrA->getType() != PtrB->getType()) return false;
+
+ // Calculate the distance.
+ const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+ const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+ const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
+ const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
+
+ // Non constant distance.
+ if (!ConstOffSCEV) return false;
+
+ int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
+ Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+ // The Instructions are connsecutive if the size of the first load/store is
+ // the same as the offset.
+ int64_t Sz = DL->getTypeStoreSize(Ty);
+ return ((-Offset) == Sz);
+}
+
+bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
+ Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
+ unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+ unsigned VF = MinVecRegSize / Sz;
+
+ if (!isPowerOf2_32(Sz) || VF < 2) return false;
+
+ bool Changed = false;
+ // Look for profitable vectorizable trees at all offsets, starting at zero.
+ for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
+ if (i + VF > e) return Changed;
+ DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
+ ArrayRef<Value *> Operands = Chain.slice(i, VF);
+
+ int Cost = getTreeCost(Operands);
+ DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+ if (Cost < CostThreshold) {
+ DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+ vectorizeTree(Operands, VF);
+ i += VF - 1;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
+ ValueSet Heads, Tails;
+ SmallDenseMap<Value*, Value*> ConsecutiveChain;
+
+ // We may run into multiple chains that merge into a single chain. We mark the
+ // stores that we vectorized so that we don't visit the same store twice.
+ ValueSet VectorizedStores;
+ bool Changed = false;
+
+ // Do a quadratic search on all of the given stores and find
+ // all of the pairs of loads that follow each other.
+ for (unsigned i = 0, e = Stores.size(); i < e; ++i)
+ for (unsigned j = 0; j < e; ++j) {
+ if (i == j) continue;
+ if (isConsecutiveAccess(Stores[i], Stores[j])) {
+ Tails.insert(Stores[j]);
+ Heads.insert(Stores[i]);
+ ConsecutiveChain[Stores[i]] = Stores[j];
+ }
+ }
+
+ // For stores that start but don't end a link in the chain:
+ for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+ if (Tails.count(*it)) continue;
+
+ // We found a store instr that starts a chain. Now follow the chain and try
+ // to vectorize it.
+ ValueList Operands;
+ Value *I = *it;
+ // Collect the chain into a list.
+ while (Tails.count(I) || Heads.count(I)) {
+ if (VectorizedStores.count(I)) break;
+ Operands.push_back(I);
+ // Move to the next value in the chain.
+ I = ConsecutiveChain[I];
+ }
+
+ bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
+
+ // Mark the vectorized stores so that we don't vectorize them again.
+ if (Vectorized)
+ VectorizedStores.insert(Operands.begin(), Operands.end());
+ Changed |= Vectorized;
+ }
+
+ return Changed;
+}
+
+int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
+ // Find the type of the operands in VL.
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+ // Find the cost of inserting/extracting values from the vector.
+ return getScalarizationCost(VecTy);
+}
+
+int BoUpSLP::getScalarizationCost(Type *Ty) {
+ int Cost = 0;
+ for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+ Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ return Cost;
+}
+
+AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
+ return AliasAnalysis::Location();
+}
+
+Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
+ assert(Src->getParent() == Dst->getParent() && "Not the same BB");
+ BasicBlock::iterator I = Src, E = Dst;
+ /// Scan all of the instruction from SRC to DST and check if
+ /// the source may alias.
+ for (++I; I != E; ++I) {
+ // Ignore store instructions that are marked as 'ignore'.
+ if (MemBarrierIgnoreList.count(I)) continue;
+ if (Src->mayWriteToMemory()) /* Write */ {
+ if (!I->mayReadOrWriteMemory()) continue;
+ } else /* Read */ {
+ if (!I->mayWriteToMemory()) continue;
+ }
+ AliasAnalysis::Location A = getLocation(&*I);
+ AliasAnalysis::Location B = getLocation(Src);
+
+ if (!A.Ptr || !B.Ptr || AA->alias(A, B))
+ return I;
+ }
+ return 0;
+}
+
+void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
+ Value *Vec = vectorizeTree(Operands, Operands.size());
+ BasicBlock::iterator Loc = cast<Instruction>(Vec);
+ IRBuilder<> Builder(++Loc);
+ // After vectorizing the operands we need to generate extractelement
+ // instructions and replace all of the uses of the scalar values with
+ // the values that we extracted from the vectorized tree.
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
+ Operands[i]->replaceAllUsesWith(S);
+ }
+}
+
+int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
+ // Get rid of the list of stores that were removed, and from the
+ // lists of instructions with multiple users.
+ MemBarrierIgnoreList.clear();
+ LaneMap.clear();
+ MultiUserVals.clear();
+ MustScalarize.clear();
+
+ // Scan the tree and find which value is used by which lane, and which values
+ // must be scalarized.
+ getTreeUses_rec(VL, 0);
+
+ // Check that instructions with multiple users can be vectorized. Mark unsafe
+ // instructions.
+ for (ValueSet::iterator it = MultiUserVals.begin(),
+ e = MultiUserVals.end(); it != e; ++it) {
+ // Check that all of the users of this instr are within the tree
+ // and that they are all from the same lane.
+ int Lane = -1;
+ for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
+ I != E; ++I) {
+ if (LaneMap.find(*I) == LaneMap.end()) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"SLP: Adding " << **it <<
+ " to MustScalarize because of an out of tree usage.\n");
+ break;
+ }
+ if (Lane == -1) Lane = LaneMap[*I];
+ if (Lane != LaneMap[*I]) {
+ MustScalarize.insert(*it);
+ DEBUG(dbgs()<<"Adding " << **it <<
+ " to MustScalarize because multiple lane use it: "
+ << Lane << " and " << LaneMap[*I] << ".\n");
+ break;
+ }
+ }
+ }
+
+ // Now calculate the cost of vectorizing the tree.
+ return getTreeCost_rec(VL, 0);
+}
+
+void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
+ if (Depth == RecursionMaxDepth) return;
+
+ // Don't handle vectors.
+ if (VL[0]->getType()->isVectorTy()) return;
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ if (SI->getValueOperand()->getType()->isVectorTy()) return;
+
+ // Check if all of the operands are constants.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If one of the instructions is out of this BB, we need to scalarize all.
+ if (I && I->getParent() != BB) return;
+ }
+
+ // If all of the operands are identical or constant we have a simple solution.
+ if (AllConst || AllSameScalar) return;
+
+ // Scalarize unknown structures.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (!VL0) return;
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return;
+ }
+
+ // Mark instructions with multiple users.
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // Remember to check if all of the users of this instr are vectorized
+ // within our tree.
+ if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
+ }
+
+ for (int i = 0, e = VL.size(); i < e; ++i) {
+ // Check that the instruction is only used within
+ // one lane.
+ if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
+ // Make this instruction as 'seen' and remember the lane.
+ LaneMap[VL[i]] = i;
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ getTreeUses_rec(Operands, Depth+1);
+ }
+ return;
+ }
+ case Instruction::Store: {
+ ValueList Operands;
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ getTreeUses_rec(Operands, Depth+1);
+ return;
+ }
+ default:
+ return;
+ }
+}
+
+int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
+ Type *ScalarTy = VL[0]->getType();
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+
+ /// Don't mess with vectors.
+ if (ScalarTy->isVectorTy()) return max_cost;
+ VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+ if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
+
+ // Check if all of the operands are constants.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ bool MustScalarizeFlag = false;
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ // Must have a single use.
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ MustScalarizeFlag |= MustScalarize.count(VL[i]);
+ // This instruction is outside the basic block.
+ if (I && I->getParent() != BB)
+ return getScalarizationCost(VecTy);
+ }
+
+ // Is this a simple vector constant.
+ if (AllConst) return 0;
+
+ // If all of the operands are identical we can broadcast them.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (AllSameScalar) {
+ // If we are in a loop, and this is not an instruction (e.g. constant or
+ // argument) or the instruction is defined outside the loop then assume
+ // that the cost is zero.
+ if (L && (!VL0 || !L->contains(VL0)))
+ return 0;
+
+ // We need to broadcast the scalar.
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
+ }
+
+ // If this is not a constant, or a scalar from outside the loop then we
+ // need to scalarize it.
+ if (MustScalarizeFlag)
+ return getScalarizationCost(VecTy);
+
+ if (!VL0) return getScalarizationCost(VecTy);
+ assert(VL0->getParent() == BB && "Wrong BB");
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
+ }
+
+ // Check if it is safe to sink the loads or the stores.
+ if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+ int MaxIdx = InstrIdx[VL0];
+ for (unsigned i = 1, e = VL.size(); i < e; ++i )
+ MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+
+ Instruction *Last = InstrVec[MaxIdx];
+ for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
+ if (VL[i] == Last) continue;
+ Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
+ if (Barrier) {
+ DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
+ *Last << "\n because of " << *Barrier << "\n");
+ return max_cost;
+ }
+ }
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ int Cost = 0;
+ ValueList Operands;
+ Type *SrcTy = VL0->getOperand(0)->getType();
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ // Check that the casted type is the same for all users.
+ if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
+ return getScalarizationCost(VecTy);
+ }
+
+ Cost += getTreeCost_rec(Operands, Depth+1);
+ if (Cost >= max_cost) return max_cost;
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
+ VL0->getType(), SrcTy);
+
+ VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+ int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+ Cost += (VecCost - ScalarCost);
+ return Cost;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ int Cost = 0;
+ // Calculate the cost of all of the operands.
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ Cost += getTreeCost_rec(Operands, Depth+1);
+ if (Cost >= max_cost) return max_cost;
+ }
+
+ // Calculate the cost of this instruction.
+ int ScalarCost = VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Opcode, ScalarTy);
+
+ int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+ Cost += (VecCost - ScalarCost);
+ return Cost;
+ }
+ case Instruction::Load: {
+ // If we are scalarize the loads, add the cost of forming the vector.
+ for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i], VL[i+1]))
+ return getScalarizationCost(VecTy);
+
+ // Cost of wide load - cost of scalar loads.
+ int ScalarLdCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+ return VecLdCost - ScalarLdCost;
+ }
+ case Instruction::Store: {
+ // We know that we can merge the stores. Calculate the cost.
+ int ScalarStCost = VecTy->getNumElements() *
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+ int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
+ int StoreCost = VecStCost - ScalarStCost;
+
+ ValueList Operands;
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+ MemBarrierIgnoreList.insert(VL[j]);
+ }
+
+ int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
+ return TotalCost;
+ }
+ default:
+ // Unable to vectorize unknown instructions.
+ return getScalarizationCost(VecTy);
+ }
+}
+
+Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
+ int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
+ for (unsigned i = 0; i < VF; ++i )
+ MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+ return InstrVec[MaxIdx + 1];
+}
+
+Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
+ IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
+ Value *Vec = UndefValue::get(Ty);
+ for (unsigned i=0; i < Ty->getNumElements(); ++i) {
+ // Generate the 'InsertElement' instruction.
+ Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+ // Remember that this instruction is used as part of a 'gather' sequence.
+ // The caller of the bottom-up slp vectorizer can try to hoist the sequence
+ // if the users are outside of the basic block.
+ GatherInstructions.push_back(Vec);
+ }
+
+ return Vec;
+}
+
+Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
+ Value *V = vectorizeTree_rec(VL, VF);
+ // We moved some instructions around. We have to number them again
+ // before we can do any analysis.
+ numberInstructions();
+ MustScalarize.clear();
+ return V;
+}
+
+Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
+ Type *ScalarTy = VL[0]->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ VectorType *VecTy = VectorType::get(ScalarTy, VF);
+
+ // Check if all of the operands are constants or identical.
+ bool AllConst = true;
+ bool AllSameScalar = true;
+ for (unsigned i = 0, e = VF; i < e; ++i) {
+ AllConst &= isa<Constant>(VL[i]);
+ AllSameScalar &= (VL[0] == VL[i]);
+ // The instruction must be in the same BB, and it must be vectorizable.
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
+ return Scalarize(VL, VecTy);
+ }
+
+ // Check that this is a simple vector constant.
+ if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+
+ // Scalarize unknown structures.
+ Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+ if (!VL0) return Scalarize(VL, VecTy);
+
+ if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+
+ unsigned Opcode = VL0->getOpcode();
+ for (unsigned i = 0, e = VF; i < e; ++i) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ // If not all of the instructions are identical then we have to scalarize.
+ if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+ }
+
+ switch (Opcode) {
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ ValueList INVL;
+ for (int i = 0; i < VF; ++i)
+ INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ Value *InVec = vectorizeTree_rec(INVL, VF);
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ CastInst *CI = dyn_cast<CastInst>(VL0);
+ Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+ VectorizedValues[VL0] = V;
+ return V;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ ValueList LHSVL, RHSVL;
+ for (int i = 0; i < VF; ++i) {
+ RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+ LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+ }
+
+ Value *RHS = vectorizeTree_rec(RHSVL, VF);
+ Value *LHS = vectorizeTree_rec(LHSVL, VF);
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
+ VectorizedValues[VL0] = V;
+ return V;
+ }
+ case Instruction::Load: {
+ LoadInst *LI = cast<LoadInst>(VL0);
+ unsigned Alignment = LI->getAlignment();
+
+ // Check if all of the loads are consecutive.
+ for (unsigned i = 1, e = VF; i < e; ++i)
+ if (!isConsecutiveAccess(VL[i-1], VL[i]))
+ return Scalarize(VL, VecTy);
+
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+ VecTy->getPointerTo());
+ LI = Builder.CreateLoad(VecPtr);
+ LI->setAlignment(Alignment);
+ VectorizedValues[VL0] = LI;
+ return LI;
+ }
+ case Instruction::Store: {
+ StoreInst *SI = cast<StoreInst>(VL0);
+ unsigned Alignment = SI->getAlignment();
+
+ ValueList ValueOp;
+ for (int i = 0; i < VF; ++i)
+ ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
+
+ Value *VecValue = vectorizeTree_rec(ValueOp, VF);
+
+ IRBuilder<> Builder(GetLastInstr(VL, VF));
+ Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+ VecTy->getPointerTo());
+ Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
+
+ for (int i = 0; i < VF; ++i)
+ cast<Instruction>(VL[i])->eraseFromParent();
+ return 0;
+ }
+ default:
+ Value *S = Scalarize(VL, VecTy);
+ VectorizedValues[VL0] = S;
+ return S;
+ }
+}
+
+} // end of namespace
diff --git a/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h
new file mode 100644
index 0000000..5456c6c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Vectorize/VecUtils.h
@@ -0,0 +1,164 @@
+//===- VecUtils.h - Vectorization Utilities -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of classes and functions manipulate vectors and chains of
+// vectors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock; class Instruction; class Type;
+class VectorType; class StoreInst; class Value;
+class ScalarEvolution; class DataLayout;
+class TargetTransformInfo; class AliasAnalysis;
+class Loop;
+
+/// Bottom Up SLP vectorization utility class.
+struct BoUpSLP {
+ typedef SmallVector<Value*, 8> ValueList;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+ typedef SmallVector<StoreInst*, 8> StoreList;
+ static const int max_cost = 1<<20;
+
+ // \brief C'tor.
+ BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
+ TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
+
+ /// \brief Take the pointer operand from the Load/Store instruction.
+ /// \returns NULL if this is not a valid Load/Store instruction.
+ static Value *getPointerOperand(Value *I);
+
+ /// \brief Take the address space operand from the Load/Store instruction.
+ /// \returns -1 if this is not a valid Load/Store instruction.
+ static unsigned getAddressSpaceOperand(Value *I);
+
+ /// \returns true if the memory operations A and B are consecutive.
+ bool isConsecutiveAccess(Value *A, Value *B);
+
+ /// \brief Vectorize the tree that starts with the elements in \p VL.
+ /// \returns the vectorized value.
+ Value *vectorizeTree(ArrayRef<Value *> VL, int VF);
+
+ /// \returns the vectorization cost of the subtree that starts at \p VL.
+ /// A negative number means that this is profitable.
+ int getTreeCost(ArrayRef<Value *> VL);
+
+ /// \returns the scalarization cost for this list of values. Assuming that
+ /// this subtree gets vectorized, we may need to extract the values from the
+ /// roots. This method calculates the cost of extracting the values.
+ int getScalarizationCost(ArrayRef<Value *> VL);
+
+ /// \brief Attempts to order and vectorize a sequence of stores. This
+ /// function does a quadratic scan of the given stores.
+ /// \returns true if the basic block was modified.
+ bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
+
+ /// \brief Vectorize a group of scalars into a vector tree.
+ void vectorizeArith(ArrayRef<Value *> Operands);
+
+ /// \returns the list of new instructions that were added in order to collect
+ /// scalars into vectors. This list can be used to further optimize the gather
+ /// sequences.
+ ValueList &getGatherSeqInstructions() {return GatherInstructions; }
+
+private:
+ /// \brief This method contains the recursive part of getTreeCost.
+ int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+ /// \brief This recursive method looks for vectorization hazards such as
+ /// values that are used by multiple users and checks that values are used
+ /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.
+ void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+ /// \brief This method contains the recursive part of vectorizeTree.
+ Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF);
+
+ /// \brief Number all of the instructions in the block.
+ void numberInstructions();
+
+ /// \brief Vectorize a sorted sequence of stores.
+ bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);
+
+ /// \returns the scalarization cost for this type. Scalarization in this
+ /// context means the creation of vectors from a group of scalars.
+ int getScalarizationCost(Type *Ty);
+
+ /// \returns the AA location that is being access by the instruction.
+ AliasAnalysis::Location getLocation(Instruction *I);
+
+ /// \brief Checks if it is possible to sink an instruction from
+ /// \p Src to \p Dst.
+ /// \returns the pointer to the barrier instruction if we can't sink.
+ Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
+
+ /// \returns the instruction that appears last in the BB from \p VL.
+ /// Only consider the first \p VF elements.
+ Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
+
+ /// \returns a vector from a collection of scalars in \p VL.
+ Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
+
+private:
+ /// Maps instructions to numbers and back.
+ SmallDenseMap<Value*, int> InstrIdx;
+ /// Maps integers to Instructions.
+ std::vector<Instruction*> InstrVec;
+
+ // -- containers that are used during getTreeCost -- //
+
+ /// Contains values that must be scalarized because they are used
+ /// by multiple lanes, or by users outside the tree.
+ /// NOTICE: The vectorization methods also use this set.
+ ValueSet MustScalarize;
+
+ /// Contains a list of values that are used outside the current tree. This
+ /// set must be reset between runs.
+ ValueSet MultiUserVals;
+ /// Maps values in the tree to the vector lanes that uses them. This map must
+ /// be reset between runs of getCost.
+ std::map<Value*, int> LaneMap;
+ /// A list of instructions to ignore while sinking
+ /// memory instructions. This map must be reset between runs of getCost.
+ SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
+
+ // -- Containers that are used during vectorizeTree -- //
+
+ /// Maps between the first scalar to the vector. This map must be reset
+ ///between runs.
+ DenseMap<Value*, Value*> VectorizedValues;
+
+ // -- Containers that are used after vectorization by the caller -- //
+
+ /// A list of instructions that are used when gathering scalars into vectors.
+ /// In many cases these instructions can be hoisted outside of the BB.
+ /// Iterating over this list is faster than calling LICM.
+ ValueList GatherInstructions;
+
+ // Analysis and block reference.
+ BasicBlock *BB;
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+ Loop *L;
+};
+
+} // end of namespace
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
diff --git a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 19eefd2..a927fe1 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
- //===-- Vectorize.cpp -----------------------------------------------------===//
+//===-- Vectorize.cpp -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,6 +28,7 @@ using namespace llvm;
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeBBVectorizePass(Registry);
initializeLoopVectorizePass(Registry);
+ initializeSLPVectorizerPass(Registry);
}
void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createLoopVectorizePass());
}
+
+void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createSLPVectorizerPass());
+}
OpenPOWER on IntegriCloud